From 0563bf79997064e7d3359ad841aed3e3e028d611 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 14 May 2026 20:23:16 +0200 Subject: [PATCH] feat(tests/integration): Phase 0 harness + smoke (envtest + linstor CLI) Build the Tier 2 integration-test scaffold per docs/test-strategy.md so the 12 Phase 1 group agents have a working harness to extend. The scaffold boots envtest + the full controller-runtime manager + the LINSTOR REST server on a free port, drives it with the real upstream `linstor` CLI, and ships an in-process satellite mock that writes the healthy steady-state Status fields (Ready, FreeCapacity, UpToDate) without shelling out. Files all guarded by `//go:build integration` so `go test ./...` without the tag is unaffected. Co-Authored-By: Claude Signed-off-by: Andrei Kvapil --- tests/integration/harness/asserts.go | 148 ++++++++ tests/integration/harness/concurrent.go | 83 +++++ tests/integration/harness/csi.go | 72 ++++ tests/integration/harness/envtest.go | 139 ++++++++ tests/integration/harness/fixtures.go | 183 ++++++++++ tests/integration/harness/linstor.go | 199 +++++++++++ tests/integration/harness/manager.go | 428 ++++++++++++++++++++++++ tests/integration/harness/satellite.go | 367 ++++++++++++++++++++ tests/integration/smoke_test.go | 74 ++++ 9 files changed, 1693 insertions(+) create mode 100644 tests/integration/harness/asserts.go create mode 100644 tests/integration/harness/concurrent.go create mode 100644 tests/integration/harness/csi.go create mode 100644 tests/integration/harness/envtest.go create mode 100644 tests/integration/harness/fixtures.go create mode 100644 tests/integration/harness/linstor.go create mode 100644 tests/integration/harness/manager.go create mode 100644 tests/integration/harness/satellite.go create mode 100644 tests/integration/smoke_test.go diff --git a/tests/integration/harness/asserts.go b/tests/integration/harness/asserts.go new file mode 100644 index 0000000..22d1082 --- /dev/null +++ b/tests/integration/harness/asserts.go @@ -0,0 +1,148 @@ +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package harness + +import ( + "context" + "testing" + "time" + + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + blockstoriov1alpha1 "github.com/cozystack/blockstor/api/v1alpha1" +) + +// Eventually polls predicate until it returns true or the timeout +// elapses. Fails the test via t.Fatalf on timeout. The default +// poll interval is 100ms — short enough that a 30s budget gives +// 300 attempts, long enough that the apiserver isn't hammered. +func Eventually(t *testing.T, timeout time.Duration, predicate func() bool, msg string) { + t.Helper() + + const pollInterval = 100 * time.Millisecond + + deadline := time.Now().Add(timeout) + + for { + if predicate() { + return + } + + if time.Now().After(deadline) { + t.Fatalf("Eventually timed out after %s: %s", timeout, msg) + } + + time.Sleep(pollInterval) + } +} + +// MustList returns the .Items slice of the given list-type. The +// caller passes a pointer to an empty list (e.g. &blockstoriov1alpha1.NodeList{}) +// and a function that extracts the Items. We keep the interface +// explicit rather than reflection-based: every CRD has a slightly +// different Items field type, so any "generic" wrapper still needs +// a per-kind extractor. +// +// Example: +// +// nodes := MustList(t, c, &blockstoriov1alpha1.NodeList{}, +// func(l *blockstoriov1alpha1.NodeList) []blockstoriov1alpha1.Node { return l.Items }) +func MustList[L client.ObjectList, T any]( + t *testing.T, + c client.Client, + list L, + items func(L) []T, + opts ...client.ListOption, +) []T { + t.Helper() + + err := c.List(context.Background(), list, opts...) + if err != nil { + t.Fatalf("List %T: %v", list, err) + } + + return items(list) +} + +// MustGet fetches the named cluster-scoped object into `into`. For +// namespaced objects callers should fall back to the raw +// client.Client.Get — the integration suite is overwhelmingly +// cluster-scoped (CRDs are scope=Cluster), so this terse form +// covers the common case. +func MustGet[T client.Object](t *testing.T, c client.Client, name string, into T) T { + t.Helper() + + err := c.Get(context.Background(), types.NamespacedName{Name: name}, into) + if err != nil { + t.Fatalf("Get %s/%s: %v", typeNameOf(into), name, err) + } + + return into +} + +// WaitForDRBDState polls until the named Resource on the given +// node has Status.DrbdState == want. The satellite mock fills +// this in on each tick — see harness/satellite.go. +func WaitForDRBDState(t *testing.T, stack *Stack, rd, node, want string) { + t.Helper() + + const drbdStateTimeout = 15 * time.Second + + // Resource.metadata.name follows `.` (CEL-pinned). + resourceName := rd + "." + node + + Eventually(t, drbdStateTimeout, func() bool { + var r blockstoriov1alpha1.Resource + + err := stack.Env.Client.Get(context.Background(), + types.NamespacedName{Name: resourceName}, &r) + if err != nil { + return false + } + + return r.Status.DrbdState == want + }, "Resource "+resourceName+" DrbdState != "+want) +} + +func typeNameOf(obj client.Object) string { + gvk := obj.GetObjectKind().GroupVersionKind() + if gvk.Kind != "" { + return gvk.Kind + } + // Fallback: Go type name. Concrete types unset GVK until + // the scheme decodes them, which never happens on a hand- + // allocated `&Foo{}` test object. + switch obj.(type) { + case *blockstoriov1alpha1.Node: + return "Node" + case *blockstoriov1alpha1.StoragePool: + return "StoragePool" + case *blockstoriov1alpha1.ResourceGroup: + return "ResourceGroup" + case *blockstoriov1alpha1.ResourceDefinition: + return "ResourceDefinition" + case *blockstoriov1alpha1.Resource: + return "Resource" + case *blockstoriov1alpha1.Snapshot: + return "Snapshot" + default: + return "Object" + } +} diff --git a/tests/integration/harness/concurrent.go b/tests/integration/harness/concurrent.go new file mode 100644 index 0000000..00fd040 --- /dev/null +++ b/tests/integration/harness/concurrent.go @@ -0,0 +1,83 @@ +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package harness + +import ( + "fmt" + "runtime/debug" + "sync" + "testing" +) + +// RunParallel spawns n goroutines, calls body(i) on each, waits +// for all to finish, and propagates any panic back to t.Fatal. +// The goroutine-storm shape exists for Group L +// (`concurrent_test.go`) where we exercise reconcile races against +// the apiserver. +// +// Panics are captured per-goroutine and reported under a stable +// banner; the first panic short-circuits the test, the rest are +// logged via t.Log so the operator can see the full picture. +func RunParallel(t *testing.T, n int, body func(i int)) { + t.Helper() + + if n <= 0 { + t.Fatalf("RunParallel: n must be positive, got %d", n) + } + + var wg sync.WaitGroup + + panics := make([]string, n) + wg.Add(n) + + for i := range n { + go func(idx int) { + defer wg.Done() + defer func() { + rec := recover() + if rec != nil { + panics[idx] = fmt.Sprintf("goroutine %d panic: %v\n%s", + idx, rec, debug.Stack()) + } + }() + + body(idx) + }(i) + } + + wg.Wait() + + first := "" + + for i := range panics { + if panics[i] == "" { + continue + } + + if first == "" { + first = panics[i] + } else { + t.Log(panics[i]) + } + } + + if first != "" { + t.Fatal(first) + } +} diff --git a/tests/integration/harness/csi.go b/tests/integration/harness/csi.go new file mode 100644 index 0000000..8a79ca1 --- /dev/null +++ b/tests/integration/harness/csi.go @@ -0,0 +1,72 @@ +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package harness + +import ( + "net/url" + "testing" + + lapi "github.com/LINBIT/golinstor/client" + + csidriver "github.com/cozystack/blockstor/pkg/csi-driver" +) + +// CSI exposes blockstor's CSI shim wired to the in-process REST +// server. The project does NOT ship a gRPC CSI server today — +// `pkg/csi-driver.Driver` is a behaviour-bearing adapter that the +// real linstor-csi sidecar wraps in `csi.ControllerServer`. The +// docs/test-strategy.md scaffold table calls for in-process CSI +// gRPC; until the project actually grows that binary, Phase 0 +// exposes the Driver directly. Phase 1 Group J builds on this +// surface. +// +// The TODO is tracked in the PR body's "caveats" section. +type CSI struct { + // Driver is the same Driver linstor-csi uses in production — + // see pkg/csi-driver/driver.go. + Driver *csidriver.Driver + + // Client is the underlying golinstor REST client, exposed for + // tests that need direct lapi-level operations not yet + // proxied through Driver (e.g. ListSnapshots envelope checks + // for Bug 201 in Phase 1 Group J). + Client *lapi.Client +} + +// NewCSI builds the CSI surface against `stack.RestURL`. Cheap; +// no goroutines spawned. The Driver re-uses one lapi.Client across +// the test, matching the linstor-csi sidecar's lifecycle. +func NewCSI(t *testing.T, stack *Stack) *CSI { + t.Helper() + + u, err := url.Parse(stack.RestURL) + if err != nil { + t.Fatalf("parse REST URL %q: %v", stack.RestURL, err) + } + + c, err := lapi.NewClient(lapi.BaseURL(u)) + if err != nil { + t.Fatalf("lapi.NewClient: %v", err) + } + + return &CSI{ + Driver: &csidriver.Driver{Client: c}, + Client: c, + } +} diff --git a/tests/integration/harness/envtest.go b/tests/integration/harness/envtest.go new file mode 100644 index 0000000..9c4b281 --- /dev/null +++ b/tests/integration/harness/envtest.go @@ -0,0 +1,139 @@ +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package harness wires the Tier 2 integration-test stack: envtest +// (in-process kube-apiserver+etcd) → controller-runtime manager with +// every reconciler we ship → blockstor REST server on a free port → +// in-process satellite mock writing Status fields. See +// docs/test-strategy.md for the tier architecture and +// tests/integration/README.md for how to run. +package harness + +import ( + "os" + "path/filepath" + "runtime" + "testing" + "time" + + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + + blockstoriov1alpha1 "github.com/cozystack/blockstor/api/v1alpha1" +) + +// Env is the booted envtest stack: a live in-process kube-apiserver +// + etcd, the controller-runtime config to talk to it, and a typed +// client preloaded with our scheme. Stop is registered via +// t.Cleanup; callers should not invoke it directly. +type Env struct { + Cfg *rest.Config + Client client.Client + Stop func() +} + +// Start boots envtest with the blockstor CRDs from +// config/crd/bases/. It reads KUBEBUILDER_ASSETS (the path the +// controller-runtime setup-envtest tool prints) and fails the test +// with a clear message if unset — there is no useful fallback, +// because the envtest harness requires the etcd + kube-apiserver +// binaries to launch. +// +// The returned Env is ready for use; teardown is registered via +// t.Cleanup. Each call boots a fresh apiserver, so tests are +// trivially isolated. +func Start(t *testing.T) *Env { + t.Helper() + + assets := os.Getenv("KUBEBUILDER_ASSETS") + if assets == "" { + t.Fatalf("KUBEBUILDER_ASSETS is unset; run `setup-envtest use --print path 1.34.x` and export it before running integration tests (see tests/integration/README.md)") + } + + // scheme carries both the core clientgo types and the blockstor + // CRD group, mirroring cmd/controller/main.go's init(). + scheme := clientgoscheme.Scheme + utilruntime.Must(blockstoriov1alpha1.AddToScheme(scheme)) + + testEnv := &envtest.Environment{ + CRDDirectoryPaths: []string{crdBasesPath(t)}, + ErrorIfCRDPathMissing: true, + BinaryAssetsDirectory: assets, + } + + cfg, err := testEnv.Start() + if err != nil { + t.Fatalf("envtest start: %v", err) + } + + envClient, err := client.New(cfg, client.Options{Scheme: scheme}) + if err != nil { + _ = testEnv.Stop() + + t.Fatalf("envtest client: %v", err) + } + + stop := func() { + // envtest's Stop is best-effort; a 30s budget mirrors the + // kubebuilder default Eventually timeout used in + // internal/controller/suite_test.go. + const stopBudget = 30 * time.Second + + deadline := time.Now().Add(stopBudget) + + for { + err := testEnv.Stop() + if err == nil { + return + } + + if time.Now().After(deadline) { + t.Logf("envtest stop after deadline: %v", err) + + return + } + + time.Sleep(time.Second) + } + } + t.Cleanup(stop) + + return &Env{Cfg: cfg, Client: envClient, Stop: stop} +} + +// crdBasesPath resolves the absolute path to config/crd/bases/ +// regardless of which sub-package's working directory the test +// runs in. Anchors the lookup on this source file so go test's +// per-package CWD doesn't break the path. +func crdBasesPath(t *testing.T) string { + t.Helper() + + _, thisFile, _, ok := runtime.Caller(0) + if !ok { + t.Fatalf("runtime.Caller failed; cannot locate CRD bases") + } + + // thisFile = /tests/integration/harness/envtest.go + // Walk up four levels: harness → integration → tests → . + repoRoot := filepath.Join(filepath.Dir(thisFile), "..", "..", "..") + + return filepath.Join(repoRoot, "config", "crd", "bases") +} diff --git a/tests/integration/harness/fixtures.go b/tests/integration/harness/fixtures.go new file mode 100644 index 0000000..13b9f1e --- /dev/null +++ b/tests/integration/harness/fixtures.go @@ -0,0 +1,183 @@ +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package harness + +import ( + "context" + "testing" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + blockstoriov1alpha1 "github.com/cozystack/blockstor/api/v1alpha1" +) + +// Canonical fixture node names. Kept as named constants (not a +// slice) so other harness helpers can reference them by symbol. +const ( + NodeWorker1 = "worker-1" + NodeWorker2 = "worker-2" + NodeWorker3 = "worker-3" + + // FixtureDefaultRG is the canonical resource-group every wave-1 + // SC targets. Spawn-from-RG tests assume this exists with + // placeCount=2. + FixtureDefaultRG = "default" + + // providerLVMThin / providerZFSThin / providerFile mirror the + // kebab-case pool names a real LINSTOR/blockstor deployment uses + // for these provider kinds. + providerLVMThin = "lvm-thin" + providerZFSThin = "zfs-thin" + providerFile = "file" + + // satellitePortDefault is the upstream-LINSTOR satellite plain- + // text port. Same value the production manifests use. + satellitePortDefault int32 = 3366 +) + +// FixtureNodes returns the canonical 3-node cluster names. A +// function (not a `var`) so the linter is happy and so tests +// always get a fresh slice they can mutate without surprise. +func FixtureNodes() []string { + return []string{NodeWorker1, NodeWorker2, NodeWorker3} +} + +// FixtureProvider is the (provider-kind, pool-name) pair the +// fixture seeds on each node. +type FixtureProvider struct { + Kind string + PoolName string +} + +// FixtureProviders returns the canonical provider list per node: +// one LVM_THIN pool, one ZFS_THIN, one FILE. Total: 9 SPs across +// 3 nodes. +func FixtureProviders() []FixtureProvider { + return []FixtureProvider{ + {Kind: providerLVMThinUpper, PoolName: providerLVMThin}, + {Kind: providerZFSThinUpper, PoolName: providerZFSThin}, + {Kind: "FILE", PoolName: providerFile}, + } +} + +// SeedThreeNodeCluster creates the canonical fixture: 3 Nodes, 9 +// StoragePools (3 providers × 3 nodes), and one default RG. Safe +// to call multiple times — AlreadyExists is treated as success so +// per-group tests that re-seed don't have to special-case +// "previous test left this here". +func SeedThreeNodeCluster(t *testing.T, stack *Stack) { + t.Helper() + + ctx := context.Background() + cli := stack.Env.Client + + for _, name := range FixtureNodes() { + seedNode(ctx, t, cli, name) + } + + for _, name := range FixtureNodes() { + for _, prov := range FixtureProviders() { + seedStoragePool(ctx, t, cli, name, prov.PoolName, prov.Kind) + } + } + + seedDefaultResourceGroup(ctx, t, cli) +} + +func seedNode(ctx context.Context, t *testing.T, cli client.Client, name string) { + t.Helper() + + node := &blockstoriov1alpha1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: blockstoriov1alpha1.NodeSpec{ + Type: "SATELLITE", + NetInterfaces: []blockstoriov1alpha1.NodeNetInterface{ + { + Name: "default", + Address: "10.0.0." + nodeOctet(name), + SatellitePort: satellitePortDefault, + SatelliteEncryptionType: "PLAIN", + }, + }, + }, + } + + err := cli.Create(ctx, node) + if err != nil && !apierrors.IsAlreadyExists(err) { + t.Fatalf("create Node %q: %v", name, err) + } +} + +func seedStoragePool(ctx context.Context, t *testing.T, cli client.Client, node, pool, kind string) { + t.Helper() + + pool2 := &blockstoriov1alpha1.StoragePool{ + // Composite name pinned by the CEL XValidation rule on + // StoragePool — keep `.` exactly. + ObjectMeta: metav1.ObjectMeta{Name: pool + "." + node}, + Spec: blockstoriov1alpha1.StoragePoolSpec{ + NodeName: node, + PoolName: pool, + ProviderKind: kind, + }, + } + + err := cli.Create(ctx, pool2) + if err != nil && !apierrors.IsAlreadyExists(err) { + t.Fatalf("create StoragePool %q: %v", pool2.Name, err) + } +} + +func seedDefaultResourceGroup(ctx context.Context, t *testing.T, cli client.Client) { + t.Helper() + + resourceGroup := &blockstoriov1alpha1.ResourceGroup{ + ObjectMeta: metav1.ObjectMeta{Name: FixtureDefaultRG}, + Spec: blockstoriov1alpha1.ResourceGroupSpec{ + Description: "harness default RG (placeCount=2)", + SelectFilter: blockstoriov1alpha1.ResourceGroupSelectFilter{ + PlaceCount: 2, + }, + }, + } + + err := cli.Create(ctx, resourceGroup) + if err != nil && !apierrors.IsAlreadyExists(err) { + t.Fatalf("create ResourceGroup %q: %v", resourceGroup.Name, err) + } +} + +// nodeOctet returns the last octet (`1`, `2`, `3`) for the +// canonical 10.0.0. address derived from the node name. +// Falls back to "1" for any unrecognised name so the helper is +// not load-bearing on the naming scheme. +func nodeOctet(name string) string { + switch name { + case NodeWorker1: + return "1" + case NodeWorker2: + return "2" + case NodeWorker3: + return "3" + default: + return "1" + } +} diff --git a/tests/integration/harness/linstor.go b/tests/integration/harness/linstor.go new file mode 100644 index 0000000..4aacb25 --- /dev/null +++ b/tests/integration/harness/linstor.go @@ -0,0 +1,199 @@ +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package harness + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "os/exec" + "strings" + "testing" + "time" +) + +// CLI invokes the upstream `linstor` Python client against the +// blockstor REST URL. We intentionally exec the real CLI binary +// instead of mocking it — the Tier 2 contract pins the wire shape +// blockstor returns, and the cheapest way to catch regressions +// against the actual Python parser (which has crashed us before +// with `xml.etree.ElementTree.ParseError` etc.) is to make it +// parse our responses for real. +type CLI struct { + // URL is the controllers base URL — e.g. http://127.0.0.1:NNNN. + URL string + + // Binary, if non-empty, overrides the binary name `linstor`. + // Tests should rarely set this; CI installs the canonical + // `linstor-client` Debian package. + Binary string + + // Timeout caps each invocation. Defaults to 30s if zero. + Timeout time.Duration +} + +// ErrLinstorBinaryMissing is returned (via t.Fatal) when no +// `linstor` binary is on PATH. Carries an actionable message so +// the operator running `go test` locally without the CLI knows +// what to install. +var ErrLinstorBinaryMissing = errors.New("linstor binary not on PATH; install `linstor-client` (Debian) or skip with -short") + +// Run invokes `linstor --controllers --machine-readable +// ` and returns stdout. The test fails on any of: +// - non-zero exit +// - python traceback in stderr (matched by the same pattern +// `tests/e2e/client-compat.sh` uses) +// - HTTPConnectionPool / xml ParseError fragments that the +// Python json/REST layer emits when blockstor returns +// something it can't decode (Bug-59 class). +// +// stderr is folded into the failure message so the operator sees +// what went wrong without re-running with -v. +func (c *CLI) Run(t *testing.T, args ...string) []byte { + t.Helper() + + binary := c.binary() + + _, err := exec.LookPath(binary) + if err != nil { + t.Fatalf("%v (looked for %q): %v", ErrLinstorBinaryMissing, binary, err) + } + + full := append([]string{"--controllers", c.URL, "--machine-readable"}, args...) + + ctx, cancel := context.WithTimeout(context.Background(), c.timeout()) + defer cancel() + + cmd := exec.CommandContext(ctx, binary, full...) + + var ( + stdout bytes.Buffer + stderr bytes.Buffer + ) + + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err = cmd.Run() + checkFatalErrors(t, args, err, stderr.String()) + + return stdout.Bytes() +} + +// JSON runs the CLI and json.Unmarshals stdout into a flat list +// of objects. The `--machine-readable` envelope is actually +// nested: `[[obj, obj, ...]]` (outer array = "one response per +// command", inner array = the resource list itself). We flatten +// one level deep so tests get the natural `[]map` shape. +// +// For single-object endpoints (e.g. `controller version`) the CLI +// emits `[{...}]`; this helper detects that and returns the flat +// shape too. +func (c *CLI) JSON(t *testing.T, args ...string) []map[string]any { + t.Helper() + + const logCap = 512 + + out := c.Run(t, args...) + + // Try the most common case first: an array whose first element + // is itself an array (the LINSTOR list-envelope). + var nested [][]map[string]any + + errNested := json.Unmarshal(out, &nested) + if errNested == nil { + flat := make([]map[string]any, 0) + for _, sub := range nested { + flat = append(flat, sub...) + } + + return flat + } + + // Fall back to the flat shape: `[obj, obj, ...]`. + var flat []map[string]any + + errFlat := json.Unmarshal(out, &flat) + if errFlat == nil { + return flat + } + + t.Fatalf("linstor %s: unmarshal JSON (len=%d) into either [[obj...]] or [obj...]: stdout: %s", + strings.Join(args, " "), len(out), truncateForLog(out, logCap)) + + return nil +} + +func (c *CLI) binary() string { + if c.Binary != "" { + return c.Binary + } + + return "linstor" +} + +func (c *CLI) timeout() time.Duration { + const defaultTimeout = 30 * time.Second + + if c.Timeout > 0 { + return c.Timeout + } + + return defaultTimeout +} + +// checkFatalErrors centralises the failure-mode classifier the +// Run wrapper applies. Pulled out so the hot path stays readable +// and so we can extend it later with new "this means linstor is +// broken" patterns without churning Run's signature. +func checkFatalErrors(t *testing.T, args []string, err error, stderrText string) { + t.Helper() + + // Patterns lifted from tests/e2e/client-compat.sh — the + // historical "linstor exited 0 but blew up the parser" class. + fatalPatterns := []string{ + "Traceback (most recent call last)", + "xml.etree.ElementTree.ParseError", + "HTTPConnectionPool", + "json.decoder.JSONDecodeError", + } + + for _, p := range fatalPatterns { + if strings.Contains(stderrText, p) { + t.Fatalf("linstor %s: stderr contains %q\nstderr: %s", + strings.Join(args, " "), p, stderrText) + } + } + + if err != nil { + t.Fatalf("linstor %s: %v\nstderr: %s", + strings.Join(args, " "), err, stderrText) + } +} + +// truncateForLog clamps the dumped bytes so a runaway HTML error +// page doesn't blow up CI logs. +func truncateForLog(buf []byte, limit int) string { + if len(buf) <= limit { + return string(buf) + } + + return string(buf[:limit]) + "...[truncated]" +} diff --git a/tests/integration/harness/manager.go b/tests/integration/harness/manager.go new file mode 100644 index 0000000..a1ba3c8 --- /dev/null +++ b/tests/integration/harness/manager.go @@ -0,0 +1,428 @@ +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package harness + +import ( + "context" + "errors" + "fmt" + "net" + "net/http" + "testing" + "time" + + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/manager" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + blockstoriov1alpha1 "github.com/cozystack/blockstor/api/v1alpha1" + "github.com/cozystack/blockstor/internal/controller" + "github.com/cozystack/blockstor/pkg/rest" + storek8s "github.com/cozystack/blockstor/pkg/store/k8s" +) + +const ( + // managerShutdownBudget caps how long t.Cleanup waits for the + // manager goroutine to exit before logging the leak. + managerShutdownBudget = 30 * time.Second + + // healthzPollInterval is how often waitForHealthz retries the + // /v1/healthz GET while the REST server is coming up. + healthzPollInterval = 50 * time.Millisecond + + // healthzReadyTimeout caps the whole "wait until REST is up" + // budget. + healthzReadyTimeout = 30 * time.Second + + // restNamespace is the namespace pkg/rest.Server uses for the + // few native-object endpoints (passphrase Secret, etc.). + // Cluster-scoped CRDs ignore it; we pick `default` because + // envtest pre-creates that namespace. + restNamespace = "default" +) + +// errHealthzUnready is the sentinel returned by pingHealthz when +// the server answered with a non-200/204 status. Static so err113 +// can see it (and so a test can match with errors.Is if it ever +// wants to). +var errHealthzUnready = errors.New("healthz unready") + +// Stack is the fully-booted Tier 2 integration target: an envtest +// kube-apiserver, the controller-runtime manager with every +// reconciler wired in, the LINSTOR-compatible REST server, and the +// in-process satellite mock. RestURL is `http://127.0.0.1:` — +// the URL `linstor --controllers` accepts. +type Stack struct { + Env *Env + RestURL string + Manager manager.Manager + Satellite *Satellite +} + +// StartStack composes Env + Manager + REST + Satellite. Layout +// mirrors cmd/controller/main.go's manager bootstrap so the +// production wire-up and the test wire-up cannot drift on which +// reconciler exists. +// +// Concurrency: the manager runs in its own goroutine; the REST +// server is registered as a manager.Runnable so it shuts down with +// the manager. t.Cleanup cancels the root context and waits for +// the manager to exit before tearing down envtest. +func StartStack(t *testing.T) *Stack { + t.Helper() + + env := Start(t) + + // Pre-flight: blockstor's CRDs install validating webhooks + // via CEL XValidation rules (see e.g. StoragePool's + // `metadata.name == poolName.nodeName`). envtest already + // applied them when Start returned. Nothing extra to do. + + mgr, err := buildIntegrationManager(env) + if err != nil { + t.Fatalf("build manager: %v", err) + } + + st := storek8s.New(mgr.GetClient()) + + // Wire every reconciler / runnable cmd/controller/main.go + // registers. Mirror order exactly so a future split-or-merge + // of the controller binary cannot surprise these tests. + err = wireReconcilers(mgr, st) + if err != nil { + t.Fatalf("wire reconcilers: %v", err) + } + + restURL, err := mountREST(mgr, st) + if err != nil { + t.Fatalf("mount REST: %v", err) + } + + sat := NewSatellite(mgr.GetClient()) + + err = mgr.Add(sat) + if err != nil { + t.Fatalf("add satellite mock: %v", err) + } + + ctx, cancel := context.WithCancel(context.Background()) + + done := make(chan error, 1) + go func() { + done <- mgr.Start(ctx) + }() + + t.Cleanup(func() { + cancel() + // Bounded wait so a hung Start can't deadlock the test + // suite — envtest.Stop runs from Env's Cleanup next and + // will reclaim the apiserver regardless. + select { + case err := <-done: + if err != nil { + t.Logf("manager exited: %v", err) + } + case <-time.After(managerShutdownBudget): + t.Logf("manager did not exit within %s", managerShutdownBudget) + } + }) + + waitForHealthz(t, restURL, healthzReadyTimeout) + + return &Stack{ + Env: env, + RestURL: restURL, + Manager: mgr, + Satellite: sat, + } +} + +// buildIntegrationManager constructs the controller-runtime manager +// the test stack drives. LeaderElection off (every replica is +// authoritative in tests), metrics disabled (no need for a Prometheus +// listener in unit-of-integration tests). +func buildIntegrationManager(env *Env) (manager.Manager, error) { + scheme := clientgoscheme.Scheme + utilruntime.Must(blockstoriov1alpha1.AddToScheme(scheme)) + + mgr, err := ctrl.NewManager(env.Cfg, ctrl.Options{ + Scheme: scheme, + Metrics: metricsserver.Options{BindAddress: "0"}, + HealthProbeBindAddress: "0", + LeaderElection: false, + }) + if err != nil { + return nil, fmt.Errorf("new manager: %w", err) + } + + err = mgr.AddHealthzCheck("healthz", healthz.Ping) + if err != nil { + return nil, fmt.Errorf("add healthz: %w", err) + } + + err = mgr.AddReadyzCheck("readyz", healthz.Ping) + if err != nil { + return nil, fmt.Errorf("add readyz: %w", err) + } + + return mgr, nil +} + +// wireReconcilers attaches every reconciler / runnable +// cmd/controller/main.go registers. Kept in lockstep with the +// production main so a new reconciler landing on `main` is one +// edit on each side — not two diff hunks lost in code review. +// +// Split into per-domain helpers (node / storage / resource / +// background) so each one stays under the funlen budget; the +// production main groups them by convention rather than by helper. +func wireReconcilers(mgr manager.Manager, store *storek8s.Store) error { + err := wireNodeReconcilers(mgr, store) + if err != nil { + return err + } + + err = wireStoragePoolReconciler(mgr) + if err != nil { + return err + } + + err = wireResourceGroupReconcilers(mgr, store) + if err != nil { + return err + } + + err = wireResourceReconcilers(mgr, store) + if err != nil { + return err + } + + return wireBackgroundRunnables(mgr) +} + +func wireNodeReconcilers(mgr manager.Manager, store *storek8s.Store) error { + err := (&controller.NodeReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Store: store, + }).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("node: %w", err) + } + + err = (&controller.NodeHeartbeatReconciler{Client: mgr.GetClient()}).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("node-heartbeat: %w", err) + } + + err = (&controller.NodeLabelSyncReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("node-label-sync: %w", err) + } + + return nil +} + +func wireStoragePoolReconciler(mgr manager.Manager) error { + err := (&controller.StoragePoolReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("storagepool: %w", err) + } + + return nil +} + +func wireResourceGroupReconcilers(mgr manager.Manager, store *storek8s.Store) error { + err := (&controller.ResourceGroupReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Store: store, + }).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("resourcegroup: %w", err) + } + + err = (&controller.RGRebalanceReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Store: store, + }).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("rg-rebalance: %w", err) + } + + return nil +} + +func wireResourceReconcilers(mgr manager.Manager, store *storek8s.Store) error { + err := (&controller.ResourceDefinitionReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Store: store, + }).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("resourcedefinition: %w", err) + } + + err = (&controller.ResourceReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Store: store, + }).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("resource: %w", err) + } + + err = (&controller.ResourceMigrationReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("resource-migration: %w", err) + } + + err = (&controller.SnapshotReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr) + if err != nil { + return fmt.Errorf("snapshot: %w", err) + } + + return nil +} + +func wireBackgroundRunnables(mgr manager.Manager) error { + err := (&controller.AutoSnapshotRunnable{Client: mgr.GetClient()}).RegisterWithManager(mgr) + if err != nil { + return fmt.Errorf("auto-snapshot: %w", err) + } + + err = (&controller.AutoEvictReconciler{Client: mgr.GetClient()}).RegisterWithManager(mgr) + if err != nil { + return fmt.Errorf("auto-evict: %w", err) + } + + return nil +} + +// mountREST picks a free 127.0.0.1 port, hands it to the +// pkg/rest.Server, and returns the resulting http://… URL the +// linstor CLI consumes via --controllers. The free-port-then-bind +// race is documented + accepted (TOCTOU window is microseconds and +// the listener immediately re-binds). +func mountREST(mgr manager.Manager, store *storek8s.Store) (string, error) { + addr, err := pickFreePort() + if err != nil { + return "", fmt.Errorf("pick free port: %w", err) + } + + err = mgr.Add(&rest.Server{ + Addr: addr, + Store: store, + Client: mgr.GetClient(), + Namespace: restNamespace, + }) + if err != nil { + return "", fmt.Errorf("add REST: %w", err) + } + + return "http://" + addr, nil +} + +// pickFreePort asks the kernel for an unused 127.0.0.1 port, +// closes the listener, and returns the host:port string. There +// is a tiny race between Close + the REST server's Listen — in +// practice this is fine for in-process tests; the port is +// exclusively local and nothing else races us in the test process. +func pickFreePort() (string, error) { + listenCfg := net.ListenConfig{} + + listener, err := listenCfg.Listen(context.Background(), "tcp", "127.0.0.1:0") + if err != nil { + return "", fmt.Errorf("listen: %w", err) + } + + addr := listener.Addr().String() + + err = listener.Close() + if err != nil { + return "", fmt.Errorf("close probe listener: %w", err) + } + + return addr, nil +} + +// waitForHealthz polls /v1/healthz until it answers 204 or the +// deadline elapses. The REST server starts as a manager runnable, +// so the listener is up shortly after mgr.Start; this poll keeps +// the test deterministic without a hard sleep. +func waitForHealthz(t *testing.T, baseURL string, timeout time.Duration) { + t.Helper() + + deadline := time.Now().Add(timeout) + cli := &http.Client{Timeout: time.Second} + + for { + err := pingHealthz(cli, baseURL) + if err == nil { + return + } + + if time.Now().After(deadline) { + t.Fatalf("REST /v1/healthz did not answer within %s (last err=%v)", timeout, err) + } + + time.Sleep(healthzPollInterval) + } +} + +// pingHealthz issues one GET /v1/healthz request and returns nil +// when the server answered 200 or 204. Extracted from +// waitForHealthz so the polling loop stays readable and so the +// noctx linter's "use Do(*http.Request)" guidance applies in one +// place. +func pingHealthz(cli *http.Client, baseURL string) error { + req, err := http.NewRequestWithContext(context.Background(), + http.MethodGet, baseURL+"/v1/healthz", http.NoBody) + if err != nil { + return fmt.Errorf("build request: %w", err) + } + + resp, err := cli.Do(req) + if err != nil { + return fmt.Errorf("get healthz: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode == http.StatusNoContent || resp.StatusCode == http.StatusOK { + return nil + } + + return fmt.Errorf("%w: status %d", errHealthzUnready, resp.StatusCode) +} diff --git a/tests/integration/harness/satellite.go b/tests/integration/harness/satellite.go new file mode 100644 index 0000000..f405cd6 --- /dev/null +++ b/tests/integration/harness/satellite.go @@ -0,0 +1,367 @@ +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package harness + +import ( + "context" + "strings" + "sync" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + blockstoriov1alpha1 "github.com/cozystack/blockstor/api/v1alpha1" +) + +const ( + // satelliteTickInterval governs how often the satellite mock + // scans CRDs. Short enough that Eventually(30s) loops never + // wait long; long enough not to thrash the apiserver in slow + // CI. + satelliteTickInterval = 200 * time.Millisecond + + // defaultPoolTotalKiB is the fixture's stand-in pool capacity + // when the test didn't pre-stamp Status.TotalCapacity. 10 GiB + // is generous enough that placer logic that rejects undersized + // pools (Bug 35 guards) doesn't trip on the smoke. + defaultPoolTotalKiB int64 = 10 * 1024 * 1024 // 10 GiB + + // drbdStateUpToDate is the steady-state DRBD state for a + // healthy diskful replica. + drbdStateUpToDate = "UpToDate" + + // providerLVMThinUpper / providerZFSThinUpper are the upstream + // LINSTOR enum values for thin pools. Hoisted to consts so + // goconst doesn't flag the same string in the helpers and in + // fixtures.go. + providerLVMThinUpper = "LVM_THIN" + providerZFSThinUpper = "ZFS_THIN" +) + +// Satellite is the in-process mock of the per-node satellite +// reconcilers. It tail-polls the apiserver and writes Status fields +// onto Node / StoragePool / Resource / Snapshot CRDs the way a +// healthy 3-node cluster would: nodes go Ready, pools report +// FreeCapacity, resources reach DrbdState=UpToDate after a tick. +// +// The full satellite stack (pkg/satellite/controllers) drives a +// real DRBD state machine through FakeExec — far more than Phase 0 +// needs. We deliberately re-implement only the externally observable +// "healthy steady state" projection here. Phase 1 tests can extend +// via the simulation knobs (SimulatePoolMissing, SimulateDRBDState, +// FailNext) without touching the rest of the harness. +// +// Implements manager.Runnable so it shuts down with the manager. +type Satellite struct { + client client.Client + + mu sync.Mutex + + // poolMissing[node][pool] true → satellite stamps Status.PoolMissing. + poolMissing map[string]map[string]bool + + // drbdState[rdName][node] → forced DrbdState for that replica. + drbdState map[string]map[string]string + + // failNext is the queue of pending FakeExec-style failures the + // satellite mock is asked to inject the next time it sees a + // matching command. Phase 0 stores them but does not consult + // them — the simulator never shells out. Phase 1 tests will + // plumb this through their own group-specific helpers. + failNext []string + + // tickInterval governs how often the satellite scans CRDs. + tickInterval time.Duration +} + +// NewSatellite returns a Satellite ready to be Add'd to a manager. +func NewSatellite(cli client.Client) *Satellite { + return &Satellite{ + client: cli, + poolMissing: map[string]map[string]bool{}, + drbdState: map[string]map[string]string{}, + tickInterval: satelliteTickInterval, + } +} + +// NeedLeaderElection: tests don't elect leaders, but interface +// compliance keeps the manager happy if we ever flip the flag. +func (*Satellite) NeedLeaderElection() bool { return false } + +// Start is the manager.Runnable entrypoint. Returns when ctx is +// cancelled. +func (s *Satellite) Start(ctx context.Context) error { + ticker := time.NewTicker(s.tickInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return nil + case <-ticker.C: + s.tickOnce(ctx) + } + } +} + +// SimulatePoolMissing toggles PoolMissing=true on the next tick for +// the named (node, pool) tuple — surfaces the `Faulty` shape on +// `linstor sp l` (Bug 83 guard). +func (s *Satellite) SimulatePoolMissing(node, pool string) { + s.mu.Lock() + defer s.mu.Unlock() + + if s.poolMissing[node] == nil { + s.poolMissing[node] = map[string]bool{} + } + + s.poolMissing[node][pool] = true +} + +// SimulateDRBDState pins the DrbdState the satellite will stamp on +// (rdName, node). Use to force `SyncTarget`, `Outdated`, `Failed`, +// etc. for tests that need a non-healthy resource. +func (s *Satellite) SimulateDRBDState(rdName, node, state string) { + s.mu.Lock() + defer s.mu.Unlock() + + if s.drbdState[rdName] == nil { + s.drbdState[rdName] = map[string]string{} + } + + s.drbdState[rdName][node] = state +} + +// FailNext enqueues a FakeExec-style command match that the next +// satellite-side shell-out should fail with. Phase 0 stores it but +// does not enforce — the mock never shells out. The slot exists so +// Phase 1 Group F tests (e.g. Bug 83 reproduction) can declare the +// intent now and the harness can be extended without an API churn. +func (s *Satellite) FailNext(cmdline string) { + s.mu.Lock() + defer s.mu.Unlock() + + s.failNext = append(s.failNext, cmdline) +} + +// tickOnce reconciles each CRD kind the mock cares about. Errors +// are swallowed (the test will time out via Eventually if the mock +// can't make progress) — we deliberately don't log per-tick errors +// because envtest's apiserver routinely returns "object has been +// modified" during reconcile contention and the noise drowns real +// failures. +func (s *Satellite) tickOnce(ctx context.Context) { + s.reconcileNodes(ctx) + s.reconcileStoragePools(ctx) + s.reconcileResources(ctx) +} + +// reconcileNodes stamps Conditions[Ready]=True and ConnectionStatus +// ONLINE on every Node — the steady-state shape a satellite produces +// after its first heartbeat. Idempotent: only writes when the value +// actually changes. +func (s *Satellite) reconcileNodes(ctx context.Context) { + var nodes blockstoriov1alpha1.NodeList + + err := s.client.List(ctx, &nodes) + if err != nil { + return + } + + for i := range nodes.Items { + node := &nodes.Items[i] + desiredStatus := blockstoriov1alpha1.NodeConnectionStatusOnline + + if node.Status.ConnectionStatus == desiredStatus && hasReadyTrue(node.Status.Conditions) { + continue + } + + patched := node.DeepCopy() + patched.Status.ConnectionStatus = desiredStatus + patched.Status.LastHeartbeatTime = ptrNow() + patched.Status.Conditions = upsertCondition(patched.Status.Conditions, &metav1.Condition{ + Type: blockstoriov1alpha1.NodeConditionReady, + Status: metav1.ConditionTrue, + Reason: "SatelliteMockHealthy", + Message: "harness/satellite.go stamped Ready", + LastTransitionTime: metav1.Now(), + }) + + _ = s.client.Status().Update(ctx, patched) + } +} + +// reconcileStoragePools writes FreeCapacity from the SP's +// TotalCapacity (or the fixture default) and toggles PoolMissing +// based on the simulation map. Mirrors what a real satellite emits +// once `lvm vgs` / `zfs list` succeed. +func (s *Satellite) reconcileStoragePools(ctx context.Context) { + var pools blockstoriov1alpha1.StoragePoolList + + err := s.client.List(ctx, &pools) + if err != nil { + return + } + + for i := range pools.Items { + pool := &pools.Items[i] + want := s.desiredPoolStatus(pool) + + if poolStatusEqual(pool.Status, want) { + continue + } + + patched := pool.DeepCopy() + patched.Status = want + _ = s.client.Status().Update(ctx, patched) + } +} + +// reconcileResources advances Resource.Status.DrbdState to UpToDate +// (or the simulator-overridden value) and stamps per-volume DiskState. +// Connections are left empty in Phase 0 — Phase 1 group I will fill +// them in as needed. +func (s *Satellite) reconcileResources(ctx context.Context) { + var resources blockstoriov1alpha1.ResourceList + + err := s.client.List(ctx, &resources) + if err != nil { + return + } + + for i := range resources.Items { + resource := &resources.Items[i] + + want := s.desiredDrbdState(resource) + if resource.Status.DrbdState == want { + continue + } + + patched := resource.DeepCopy() + patched.Status.DrbdState = want + + // Volumes follow the resource: the satellite reports + // UpToDate / Diskless on a per-volume DiskState. We + // project the resource-level value uniformly — sufficient + // for the smoke test and Phase 0. + for j := range patched.Status.Volumes { + patched.Status.Volumes[j].DiskState = want + } + + _ = s.client.Status().Update(ctx, patched) + } +} + +// desiredPoolStatus computes the status we want stamped on a +// StoragePool. FreeCapacity defaults to defaultPoolTotalKiB if the +// fixture didn't set TotalCapacity. +func (s *Satellite) desiredPoolStatus(pool *blockstoriov1alpha1.StoragePool) blockstoriov1alpha1.StoragePoolStatus { + total := pool.Status.TotalCapacity + if total == 0 { + total = defaultPoolTotalKiB + } + + free := pool.Status.FreeCapacity + if free == 0 { + free = total + } + + missing := s.isPoolMissing(pool.Spec.NodeName, pool.Spec.PoolName) + + return blockstoriov1alpha1.StoragePoolStatus{ + FreeCapacity: free, + TotalCapacity: total, + SupportsSnapshots: providerSupportsSnapshots(pool.Spec.ProviderKind), + PoolMissing: missing, + StaticTraits: map[string]string{"kind": pool.Spec.ProviderKind}, + } +} + +// desiredDrbdState returns the configured override for this +// (rd, node) pair or UpToDate as the healthy default. +func (s *Satellite) desiredDrbdState(resource *blockstoriov1alpha1.Resource) string { + s.mu.Lock() + defer s.mu.Unlock() + + perNode, ok := s.drbdState[resource.Spec.ResourceDefinitionName] + if ok { + state, ok := perNode[resource.Spec.NodeName] + if ok { + return state + } + } + + return drbdStateUpToDate +} + +func (s *Satellite) isPoolMissing(node, pool string) bool { + s.mu.Lock() + defer s.mu.Unlock() + + return s.poolMissing[node][pool] +} + +// hasReadyTrue is a tiny helper kept off the global namespace so +// internal callers don't accidentally use it as a public assertion. +func hasReadyTrue(conds []metav1.Condition) bool { + for i := range conds { + if conds[i].Type == blockstoriov1alpha1.NodeConditionReady { + return conds[i].Status == metav1.ConditionTrue + } + } + + return false +} + +func upsertCondition(conds []metav1.Condition, cond *metav1.Condition) []metav1.Condition { + for i := range conds { + if conds[i].Type == cond.Type { + if conds[i].Status != cond.Status { + conds[i] = *cond + } + + return conds + } + } + + return append(conds, *cond) +} + +func ptrNow() *metav1.Time { + now := metav1.Now() + + return &now +} + +func providerSupportsSnapshots(kind string) bool { + switch strings.ToUpper(kind) { + case "ZFS", providerZFSThinUpper, providerLVMThinUpper, "FILE_THIN": + return true + default: + return false + } +} + +func poolStatusEqual(actual, want blockstoriov1alpha1.StoragePoolStatus) bool { + return actual.FreeCapacity == want.FreeCapacity && + actual.TotalCapacity == want.TotalCapacity && + actual.PoolMissing == want.PoolMissing && + actual.SupportsSnapshots == want.SupportsSnapshots +} diff --git a/tests/integration/smoke_test.go b/tests/integration/smoke_test.go new file mode 100644 index 0000000..e50eb7c --- /dev/null +++ b/tests/integration/smoke_test.go @@ -0,0 +1,74 @@ +//go:build integration + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package integration is the Tier 2 test suite scaffold. Tests in +// this package use the `harness` sub-package to drive an envtest +// stack with every reconciler wired, then exercise the LINSTOR REST +// surface via the upstream `linstor` CLI binary. +// +// Phase 0 ships exactly one test (TestSmokeNodeList) that proves +// envtest + manager + REST + CLI end-to-end. Phase 1 agents add +// per-group files (group__test.go). +package integration + +import ( + "sort" + "testing" + + "github.com/cozystack/blockstor/tests/integration/harness" +) + +// TestSmokeNodeList is the canonical Phase 0 smoke. It validates +// every harness component in one round-trip: +// +// - envtest + manager bootstrap (harness.StartStack) +// - fixture seeding (harness.SeedThreeNodeCluster) +// - REST server is reachable on the picked port +// - the upstream `linstor` CLI can parse our /v1/nodes response +// - the response contains exactly the three fixture nodes +// +// If this passes, Phase 1 group agents can extend the suite with +// confidence that the scaffold is functional. +func TestSmokeNodeList(t *testing.T) { + stack := harness.StartStack(t) + harness.SeedThreeNodeCluster(t, stack) + + cli := &harness.CLI{URL: stack.RestURL} + + out := cli.JSON(t, "node", "list") + + names := make([]string, 0, len(out)) + for _, row := range out { + if n, ok := row["name"].(string); ok { + names = append(names, n) + } + } + + sort.Strings(names) + + want := []string{"worker-1", "worker-2", "worker-3"} + if len(names) != len(want) { + t.Fatalf("expected %d nodes, got %d (%v)", len(want), len(names), names) + } + + for i := range want { + if names[i] != want[i] { + t.Fatalf("node[%d] = %q, want %q (full list: %v)", i, names[i], want[i], names) + } + } +}