diff --git a/.github/workflows/pre-merge.yaml b/.github/workflows/pre-merge.yaml index dc02e07..9af66db 100644 --- a/.github/workflows/pre-merge.yaml +++ b/.github/workflows/pre-merge.yaml @@ -58,3 +58,17 @@ jobs: go-version: ${{ env.GO_VERSION }} - name: Run tests run: go test -v ${{ matrix.modules }}/... + + build-e2e: + # The on-hardware integration harnesses are not run in CI (they need real + # hardware), but they are built so a change to the adapters cannot silently + # break them. + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + - name: Setup Go + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 + with: + go-version: ${{ env.GO_VERSION }} + - name: Build e2e harnesses + run: make build-e2e diff --git a/.gitignore b/.gitignore index f007973..1ccdb19 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # Binaries for programs and plugins +/bin/ *.exe *.exe~ *.dll diff --git a/Makefile b/Makefile index c2dd57a..b743e1c 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,21 @@ .DEFAULT_GOAL := all +BIN_DIR := bin + +# The e2e harnesses are deployed to RAID hosts running Rocky Linux 8 or 9 on +# x86-64. Building them as statically linked (CGO disabled) linux/amd64 binaries +# makes them depend on no glibc at all, so a single binary runs on both releases +# (Rocky 8 ships glibc 2.28, Rocky 9 ships glibc 2.34). Override on the command +# line for other targets, e.g. `make build-e2e GOARCH=arm64`. +E2E_GOOS ?= linux +E2E_GOARCH ?= amd64 +E2E_BUILD_ENV := CGO_ENABLED=0 GOOS=$(E2E_GOOS) GOARCH=$(E2E_GOARCH) + # TODO .PHONY: lint lint: @echo "Linting..." - golangci-lint run -c .golangci.yaml ./... + golangci-lint run -c .golangci.yaml ./... @echo "Lint done" # TODO @@ -14,6 +25,24 @@ tests: go test -v ./... @echo "Tests done" +# Build the on-hardware integration harnesses as static linux/amd64 binaries +# (Rocky Linux 8/9 compatible, see E2E_* above). These are manual, destructive +# tools (see tests/integration/README.md); building them just verifies they +# still compile. +.PHONY: build-e2e-mdadm +build-e2e-mdadm: + @echo "Building mdadm e2e harness ($(E2E_GOOS)/$(E2E_GOARCH))..." + $(E2E_BUILD_ENV) go build -o $(BIN_DIR)/mdadm-e2e ./tests/integration/mdadm + +.PHONY: build-e2e-storcli2 +build-e2e-storcli2: + @echo "Building storcli2 e2e harness ($(E2E_GOOS)/$(E2E_GOARCH))..." + $(E2E_BUILD_ENV) go build -o $(BIN_DIR)/storcli2-e2e ./tests/integration/storcli2 + +.PHONY: build-e2e +build-e2e: build-e2e-mdadm build-e2e-storcli2 + @echo "e2e harnesses built" + .PHONY: all -all: lint tests +all: lint tests build-e2e @echo "All done" diff --git a/tests/integration/README.md b/tests/integration/README.md new file mode 100644 index 0000000..1d0ec6d --- /dev/null +++ b/tests/integration/README.md @@ -0,0 +1,64 @@ +# Integration / e2e harnesses + +Manual, on-hardware harnesses for the RAID controller adapters. Each adapter has +its own clearly-identifiable sibling directory, all built as standalone +`package main` programs: + +| Directory | Adapter | Hardware | +|---|---|---| +| [`mdadm/`](mdadm/) | software RAID (RHEL8 / mdadm) | NVMe drives + `mdadm` | +| [`storcli2/`](storcli2/) | MegaRAID / PERC (storcli2 / perccli2) | controller + `storcli2`/`perccli2` binary | + +These are **not** part of `go test` or CI: they shell out to real tools and +mutate real storage. Run them by hand on a host with the right hardware. + +## mdadm + +Runs a fixed destructive RAID0/RAID1/RAID10 suite (create, add/remove drives, +delete) against `/dev/nvme*` devices: + +```sh +go run ./tests/integration/mdadm +``` + +## storcli2 + +Argument-driven. A bare invocation is **read-only** (inventory as markdown +tables); destructive commands run only with `-confirm`. + +```sh +# read-only inventory (default) +go run ./tests/integration/storcli2 + +# full destructive cycle: create -> assert remove unsupported -> expand -> delete +go run ./tests/integration/storcli2 scenario -raid=1 -drives=252:0,252:1 -add-drives=252:2 -confirm + +# individual destructive tasks +go run ./tests/integration/storcli2 create -raid=1 -drives=252:0,252:1 -confirm +go run ./tests/integration/storcli2 add -vd=0 -drives=252:2 -confirm +go run ./tests/integration/storcli2 delete -vd=0 -confirm +``` + +Flags: `-binary` (default `/opt/MegaRAID/storcli2/storcli2`, set to the +`perccli2` path for PERC), `-controller` (default `0`), `-raid`, `-drives`, +`-add-drives`, `-vd`, `-confirm`. Drives are addressed by their `EID:Slt` id. + +> storcli2 cannot remove drives from a volume, so the `scenario` exercises +> removal as a negative case (asserts `ErrFunctionNotSupportedByImplementation`) +> rather than mutating the array. + +### Building for the RAID hosts + +The harnesses only shell out to the vendor binaries, so they cross-compile +freely. The Makefile builds them as **statically linked `linux/amd64`** binaries +into `bin/`, which depend on no glibc and therefore run on both Rocky Linux 8 +(glibc 2.28) and Rocky Linux 9 (glibc 2.34): + +```sh +make build-e2e # both harnesses -> bin/{mdadm-e2e,storcli2-e2e} +make build-e2e-storcli2 # storcli2 only +``` + +Override the target for other hosts via the `E2E_GOOS` / `E2E_GOARCH` variables, +e.g. `make build-e2e E2E_GOARCH=arm64`. Then copy the binary to the target host +and run it there. diff --git a/tests/integration/main.go b/tests/integration/mdadm/main.go similarity index 100% rename from tests/integration/main.go rename to tests/integration/mdadm/main.go diff --git a/tests/integration/software_raid_controller.go b/tests/integration/mdadm/software_raid_controller.go similarity index 100% rename from tests/integration/software_raid_controller.go rename to tests/integration/mdadm/software_raid_controller.go diff --git a/tests/integration/storcli2/hardware_raid_controller.go b/tests/integration/storcli2/hardware_raid_controller.go new file mode 100644 index 0000000..0c15603 --- /dev/null +++ b/tests/integration/storcli2/hardware_raid_controller.go @@ -0,0 +1,258 @@ +//nolint:mnd // Integration tests, no need for constants +package main + +import ( + "context" + "fmt" + "log/slog" + "strings" + + "github.com/pkg/errors" + + "github.com/scality/raidmgmt/pkg/core" + "github.com/scality/raidmgmt/pkg/domain/entities/logicalvolume" + "github.com/scality/raidmgmt/pkg/domain/entities/physicaldrive" + "github.com/scality/raidmgmt/pkg/domain/entities/raidcontroller" + "github.com/scality/raidmgmt/pkg/domain/ports" +) + +// HardwareRAIDControllerTester drives the storcli2/perccli2 composition adapter +// against real hardware. Inventory is read-only; create/add/delete/scenario are +// destructive and are only reached once main has seen an explicit confirmation. +type HardwareRAIDControllerTester struct { + controller core.RAIDController + controllerID int + logger *slog.Logger +} + +func NewHardwareRAIDControllerTester( + controller core.RAIDController, + controllerID int, + logger *slog.Logger, +) *HardwareRAIDControllerTester { + return &HardwareRAIDControllerTester{ + controller: controller, + controllerID: controllerID, + logger: logger, + } +} + +// Inventory reads the controllers, physical drives and logical volumes and +// prints them as markdown tables. It mutates nothing. +func (t *HardwareRAIDControllerTester) Inventory(ctx context.Context) error { + l := t.logger.With(slog.String("command", "inventory")) + + controllers, err := t.controller.Controllers() + if err != nil { + return errors.Wrap(err, "failed to get controllers") + } + + ctrlMetadata := &raidcontroller.Metadata{ID: t.controllerID} + + physicalDrives, err := t.controller.PhysicalDrives(ctrlMetadata) + if err != nil { + return errors.Wrap(err, "failed to get physical drives") + } + + logicalVolumes, err := t.controller.LogicalVolumes(ctrlMetadata) + if err != nil { + return errors.Wrap(err, "failed to get logical volumes") + } + + printControllers(controllers) + printPhysicalDrives(physicalDrives) + printLogicalVolumes(logicalVolumes) + + l.InfoContext(ctx, "inventory complete", + slog.Int("controllers", len(controllers)), + slog.Int("physical_drives", len(physicalDrives)), + slog.Int("logical_volumes", len(logicalVolumes)), + ) + + return nil +} + +// Create creates a logical volume from the given RAID level and drives. +func (t *HardwareRAIDControllerTester) Create( + ctx context.Context, + level logicalvolume.RAIDLevel, + drives []*physicaldrive.Metadata, +) (*logicalvolume.LogicalVolume, error) { + l := t.logger.With(slog.String("command", "create")) + + request := &logicalvolume.Request{ + CtrlMetadata: &raidcontroller.Metadata{ID: t.controllerID}, + RAIDLevel: level, + PDrivesMetadata: drives, + // storcli2 has no IO policy; Direct satisfies the request validation and + // is ignored by the adapter. + CacheOptions: &logicalvolume.CacheOptions{ + ReadPolicy: logicalvolume.ReadPolicyReadAhead, + WritePolicy: logicalvolume.WritePolicyWriteBack, + IOPolicy: logicalvolume.IOPolicyDirect, + }, + Name: "raidmgmt_e2e", + } + + logicalVolume, err := t.controller.CreateLV(request) + if err != nil { + return nil, errors.Wrap(err, "failed to create logical volume") + } + + l.InfoContext(ctx, "created logical volume", + slog.String("id", logicalVolume.ID), + slog.String("raid_level", logicalVolume.RAIDLevel.String()), + slog.String("device_path", logicalVolume.DevicePath), + ) + + return logicalVolume, nil +} + +// Add expands the given volume with the given drives (online capacity +// expansion). +func (t *HardwareRAIDControllerTester) Add( + ctx context.Context, + vdID string, + drives []*physicaldrive.Metadata, +) error { + l := t.logger.With(slog.String("command", "add")) + + metadata := &logicalvolume.Metadata{ + CtrlMetadata: &raidcontroller.Metadata{ID: t.controllerID}, + ID: vdID, + } + + if err := t.controller.AddPDsToLV(metadata, drives...); err != nil { + return errors.Wrapf(err, "failed to expand logical volume %s", vdID) + } + + l.InfoContext(ctx, "expanded logical volume", slog.String("id", vdID)) + + return nil +} + +// Delete deletes (clears) the given volume. +func (t *HardwareRAIDControllerTester) Delete(ctx context.Context, vdID string) error { + l := t.logger.With(slog.String("command", "delete")) + + metadata := &logicalvolume.Metadata{ + CtrlMetadata: &raidcontroller.Metadata{ID: t.controllerID}, + ID: vdID, + } + + if err := t.controller.DeleteLV(metadata); err != nil { + return errors.Wrapf(err, "failed to delete logical volume %s", vdID) + } + + l.InfoContext(ctx, "deleted logical volume", slog.String("id", vdID)) + + return nil +} + +// Scenario runs the full create -> assert-remove-unsupported -> (optional) +// expand -> delete cycle, leaving the controller as it was found. Drive removal +// is asserted to be unsupported on storcli2, so it is exercised as a negative +// case rather than a mutation. +func (t *HardwareRAIDControllerTester) Scenario( + ctx context.Context, + level logicalvolume.RAIDLevel, + drives []*physicaldrive.Metadata, + addDrives []*physicaldrive.Metadata, +) (err error) { + l := t.logger.With(slog.String("command", "scenario")) + + logicalVolume, err := t.Create(ctx, level, drives) + if err != nil { + return err + } + + defer func() { + if deleteErr := t.Delete(ctx, logicalVolume.ID); deleteErr != nil && err == nil { + err = deleteErr + } + }() + + removeErr := t.controller.DeletePDsFromLV(logicalVolume.Metadata, drives[0]) + if !errors.Is(removeErr, ports.ErrFunctionNotSupportedByImplementation) { + return errors.Errorf("expected drive removal to be unsupported, got: %v", removeErr) + } + + l.InfoContext(ctx, "drive removal is unsupported as expected") + + if len(addDrives) > 0 { + if err := t.Add(ctx, logicalVolume.ID, addDrives); err != nil { + return err + } + + expanded, err := t.controller.LogicalVolume(logicalVolume.Metadata) + if err != nil { + return errors.Wrap(err, "failed to re-read expanded logical volume") + } + + l.InfoContext(ctx, "expanded logical volume drive count", + slog.Int("drives", len(expanded.PDrivesMetadata)), + ) + } + + l.InfoContext(ctx, "scenario passed") + + return err +} + +// printControllers prints the controllers as a markdown table. +func printControllers(controllers []*raidcontroller.RAIDController) { + fmt.Println("\n### Controllers") + fmt.Println("| ID | Name | Serial | JBOD supported | JBOD enabled |") + fmt.Println("|---|---|---|---|---|") + + for _, c := range controllers { + fmt.Printf("| %d | %s | %s | %t | %t |\n", + c.ID, c.Name, c.Serial, c.IsJBODSupported, c.IsJBODEnabled) + } +} + +// printPhysicalDrives prints the physical drives as a markdown table. +func printPhysicalDrives(drives []*physicaldrive.PhysicalDrive) { + fmt.Println("\n### Physical drives") + fmt.Println("| ID | Slot | Model | Size | Type | Status | JBOD |") + fmt.Println("|---|---|---|---|---|---|---|") + + for _, d := range drives { + fmt.Printf("| %s | %s | %s | %s | %s | %s | %t |\n", + d.ID, d.Slot.String(), d.Model, humanBytes(d.Size), d.Type, d.Status, d.JBOD) + } +} + +// printLogicalVolumes prints the logical volumes as a markdown table. +func printLogicalVolumes(volumes []*logicalvolume.LogicalVolume) { + fmt.Println("\n### Logical volumes") + fmt.Println("| ID | RAID | Status | Size | Drives | Device path |") + fmt.Println("|---|---|---|---|---|---|") + + for _, v := range volumes { + ids := make([]string, 0, len(v.PDrivesMetadata)) + for _, pd := range v.PDrivesMetadata { + ids = append(ids, pd.ID) + } + + fmt.Printf("| %s | %s | %s | %s | %s | %s |\n", + v.ID, v.RAIDLevel.String(), v.Status, humanBytes(v.Size), + strings.Join(ids, " "), v.DevicePath) + } +} + +// humanBytes renders a byte count in binary units for readable tables. +func humanBytes(b uint64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%d B", b) + } + + div, exp := uint64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + + return fmt.Sprintf("%.2f %ciB", float64(b)/float64(div), "KMGTPE"[exp]) +} diff --git a/tests/integration/storcli2/main.go b/tests/integration/storcli2/main.go new file mode 100644 index 0000000..229e0a6 --- /dev/null +++ b/tests/integration/storcli2/main.go @@ -0,0 +1,176 @@ +// Command storcli2-e2e is a manual on-hardware harness for the storcli2/perccli2 +// adapter. By default it prints a read-only inventory; destructive commands +// (create, add, delete, scenario) run only when -confirm is given. +// +// go run ./tests/integration/storcli2 # inventory (read-only) +// go run ./tests/integration/storcli2 scenario \ +// -drives=252:0,252:1 -add-drives=252:2 -confirm # full destructive cycle +// go run ./tests/integration/storcli2 create -raid=1 -drives=252:0,252:1 -confirm +// go run ./tests/integration/storcli2 add -vd=0 -drives=252:2 -confirm +// go run ./tests/integration/storcli2 delete -vd=0 -confirm +// +// Cross-compile for a target host (the binary shells out to storcli2/perccli2): +// +// GOOS=linux GOARCH=amd64 go build -o storcli2-e2e ./tests/integration/storcli2 +package main + +import ( + "context" + "flag" + "log/slog" + "os" + "strings" + + "github.com/pkg/errors" + + "github.com/scality/raidmgmt/pkg/core" + "github.com/scality/raidmgmt/pkg/domain/entities/logicalvolume" + "github.com/scality/raidmgmt/pkg/domain/entities/physicaldrive" + "github.com/scality/raidmgmt/pkg/domain/entities/raidcontroller" + "github.com/scality/raidmgmt/pkg/implementation/commandrunner" + raidadapter "github.com/scality/raidmgmt/pkg/implementation/raidcontroller" +) + +func main() { + logger := slog.New(slog.NewJSONHandler(os.Stdout, nil)). + With(slog.String("test_type", "e2e"), slog.String("adapter", "storcli2")) + + ctx := context.Background() + + // The command is the first non-flag argument; everything else are flags. + // Defaulting to "inventory" keeps a bare invocation read-only. + args := os.Args[1:] + command := "inventory" + + if len(args) > 0 && !strings.HasPrefix(args[0], "-") { + command, args = args[0], args[1:] + } + + fs := flag.NewFlagSet(command, flag.ExitOnError) + binary := fs.String("binary", commandrunner.StorCLI2Path, "path to the storcli2/perccli2 binary") + controllerID := fs.Int("controller", 0, "controller index") + raidLevel := fs.String("raid", "1", "RAID level for create/scenario: 0|1|10") + drivesArg := fs.String("drives", "", "comma-separated EID:Slt drive ids (e.g. 252:0,252:1)") + addDrivesArg := fs.String("add-drives", "", "comma-separated EID:Slt drive ids to expand with (scenario)") + vdID := fs.String("vd", "", "virtual drive id for add/delete") + confirm := fs.Bool("confirm", false, "required to run a destructive command") + + if err := fs.Parse(args); err != nil { + logger.ErrorContext(ctx, "failed to parse arguments", slog.Any("error", err)) + os.Exit(1) + } + + runner := commandrunner.NewStorCLI2(binary) + controller := core.NewRAIDController(raidadapter.NewStorCLI2(runner)) + tester := NewHardwareRAIDControllerTester(*controller, *controllerID, logger) + + err := dispatch(ctx, tester, command, dispatchOptions{ + controllerID: *controllerID, + raidLevel: *raidLevel, + drives: *drivesArg, + addDrives: *addDrivesArg, + vdID: *vdID, + confirm: *confirm, + }) + if err != nil { + logger.ErrorContext(ctx, "command failed", slog.String("command", command), slog.Any("error", err)) + os.Exit(1) + } +} + +type dispatchOptions struct { + controllerID int + raidLevel string + drives string + addDrives string + vdID string + confirm bool +} + +// dispatch routes a command to the tester. inventory is read-only; every other +// command is destructive and requires confirmation. +func dispatch( + ctx context.Context, + tester *HardwareRAIDControllerTester, + command string, + opts dispatchOptions, +) error { + if command == "inventory" { + return tester.Inventory(ctx) + } + + if !opts.confirm { + return errors.Errorf("refusing to run destructive command %q without -confirm", command) + } + + switch command { + case "create": + level, err := parseRAIDLevel(opts.raidLevel) + if err != nil { + return err + } + + _, err = tester.Create(ctx, level, parseDrives(opts.drives, opts.controllerID)) + + return err + case "add": + if opts.vdID == "" { + return errors.New("add requires -vd") + } + + return tester.Add(ctx, opts.vdID, parseDrives(opts.drives, opts.controllerID)) + case "delete": + if opts.vdID == "" { + return errors.New("delete requires -vd") + } + + return tester.Delete(ctx, opts.vdID) + case "scenario": + level, err := parseRAIDLevel(opts.raidLevel) + if err != nil { + return err + } + + return tester.Scenario( + ctx, + level, + parseDrives(opts.drives, opts.controllerID), + parseDrives(opts.addDrives, opts.controllerID), + ) + default: + return errors.Errorf("unknown command %q (want: inventory, create, add, delete, scenario)", command) + } +} + +// parseRAIDLevel maps a "0"/"1"/"10" string to a RAIDLevel. +func parseRAIDLevel(level string) (logicalvolume.RAIDLevel, error) { + parsed := logicalvolume.RAIDLevelMap(level) + if parsed == logicalvolume.RAIDLevelUnknown { + return parsed, errors.Errorf("invalid RAID level %q (want: 0, 1 or 10)", level) + } + + return parsed, nil +} + +// parseDrives splits a comma-separated "EID:Slt" list into drive metadata for +// the given controller. Empty entries are skipped. +func parseDrives(arg string, controllerID int) []*physicaldrive.Metadata { + ctrlMetadata := &raidcontroller.Metadata{ID: controllerID} + + parts := strings.Split(arg, ",") + drives := make([]*physicaldrive.Metadata, 0, len(parts)) + + for _, id := range parts { + id = strings.TrimSpace(id) + if id == "" { + continue + } + + drives = append(drives, &physicaldrive.Metadata{ + CtrlMetadata: ctrlMetadata, + ID: id, + }) + } + + return drives +}