From 8c335e9f756905b0de6cbecb1fbe42040234afb6 Mon Sep 17 00:00:00 2001
From: Andrey Kolkov <androndo@gmail.com>
Date: Thu, 4 Jun 2026 12:54:39 +0400
Subject: [PATCH] feat(main): add e2e with kamaji case

---
 .github/workflows/ci.yml                     |   5 +-
 .github/workflows/e2e.yml                    |  48 +++
 .github/workflows/publish.yml                |   4 +-
 Makefile                                     |   8 +
 cmd/kubectl-etcd/helpers_test.go             | 252 ++++++++++++++
 config/default/manager_auth_proxy_patch.yaml |   5 +-
 controllers/etcdcluster_controller.go        |  13 +-
 controllers/etcdcluster_controller_test.go   |  63 ++++
 hack/e2e.sh                                  | 117 +++++++
 test/e2e/kamaji_datastore_test.go            | 346 +++++++++++++++++++
 test/e2e/suite_test.go                       | 213 ++++++++++++
 test/e2e/testdata/00-namespace.yaml          |   4 +
 test/e2e/testdata/01-pki.yaml                |  59 ++++
 test/e2e/testdata/02-etcdcluster.yaml        |  24 ++
 test/e2e/testdata/03-datastore.yaml          |  35 ++
 test/e2e/testdata/04-tenantcontrolplane.yaml |  22 ++
 16 files changed, 1213 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/e2e.yml
 create mode 100644 cmd/kubectl-etcd/helpers_test.go
 create mode 100755 hack/e2e.sh
 create mode 100644 test/e2e/kamaji_datastore_test.go
 create mode 100644 test/e2e/suite_test.go
 create mode 100644 test/e2e/testdata/00-namespace.yaml
 create mode 100644 test/e2e/testdata/01-pki.yaml
 create mode 100644 test/e2e/testdata/02-etcdcluster.yaml
 create mode 100644 test/e2e/testdata/03-datastore.yaml
 create mode 100644 test/e2e/testdata/04-tenantcontrolplane.yaml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7b50150e..b097c3e7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,10 +1,11 @@
 name: CI
 
 on:
+  # No PR branch filter: PRs here target main and integration branches (e.g.
+  # v1) alike; the old `master` filter matched nothing, so CI never ran on PRs.
   pull_request:
-    branches: [ master ]
   push:
-    branches: [ master ]
+    branches: [ main ]
 
 jobs:
   verify:
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
new file mode 100644
index 00000000..51c25b52
--- /dev/null
+++ b/.github/workflows/e2e.yml
@@ -0,0 +1,48 @@
+name: E2E
+
+on:
+  # No PR branch filter: PRs in this repo target main as well as integration
+  # branches (e.g. v1), and the suite must gate all of them. A filter naming
+  # a branch PRs never target (this said `master`, which does not exist) means
+  # the workflow never runs at all.
+  #
+  # paths-ignore: a kind+cert-manager+Kamaji provisioning run costs ~30-45
+  # minutes; skip it for PRs that touch nothing the suite exercises
+  # (docs-only changes). Everything else — Go code, manifests, the harness,
+  # the workflows themselves — still gates.
+  pull_request:
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+  push:
+    tags: [ "v*" ]
+  workflow_dispatch:
+
+concurrency:
+  group: e2e-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  kamaji-datastore:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+
+      - name: e2e (kind + cert-manager + Kamaji)
+        # Provisions an ephemeral kind cluster, installs cert-manager and
+        # Kamaji at the versions pinned in hack/e2e.sh, builds the operator
+        # from this checkout and runs test/e2e — the Kamaji DataStore
+        # consumer scenario.
+        #
+        # On failure, hack/e2e.sh's EXIT trap dumps the cluster state
+        # (CRs, pods, operator and Kamaji logs) into this step's output
+        # BEFORE tearing the kind cluster down. Don't add a separate
+        # dump step here: by the time it would run, the trap has already
+        # deleted the cluster and every kubectl call would come up empty.
+        run: make e2e
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 54e0bc6e..01addceb 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -2,7 +2,9 @@ name: Build and Publish Docker Image
 
 on:
   push:
-    branches: [ master ]
+    # The default branch is main; the old `master` filter matched nothing,
+    # so images were never published.
+    branches: [ main ]
 
 jobs:
   build-and-publish:
diff --git a/Makefile b/Makefile
index 49e76dbf..c6a45a40 100644
--- a/Makefile
+++ b/Makefile
@@ -61,6 +61,14 @@ vet: ## Run go vet against code.
 test: manifests generate fmt vet envtest ## Run tests.
 	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./... -coverprofile cover.out
 
+.PHONY: test-e2e
+test-e2e: ## Run the e2e suite against the current kubeconfig context (expects cert-manager, Kamaji and the operator installed; see hack/e2e.sh).
+	go test -tags e2e -count=1 ./test/e2e/ -v -timeout 30m
+
+.PHONY: e2e
+e2e: ## Provision a kind cluster with cert-manager and Kamaji, deploy the operator, run the e2e suite. KEEP_CLUSTER=1 keeps the cluster for debugging.
+	hack/e2e.sh
+
 ##@ Build
 
 .PHONY: build
diff --git a/cmd/kubectl-etcd/helpers_test.go b/cmd/kubectl-etcd/helpers_test.go
new file mode 100644
index 00000000..60739b01
--- /dev/null
+++ b/cmd/kubectl-etcd/helpers_test.go
@@ -0,0 +1,252 @@
+package main
+
+import (
+	"crypto/ecdsa"
+	"crypto/elliptic"
+	"crypto/rand"
+	"crypto/tls"
+	"crypto/x509"
+	"crypto/x509/pkix"
+	"encoding/pem"
+	"errors"
+	"fmt"
+	"math/big"
+	"strings"
+	"testing"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+// ── Blocker #2: a failed/never-ready port-forward must not hang ──────────────
+
+func TestAwaitForward_Ready(t *testing.T) {
+	ready := make(chan struct{}, 1)
+	close(ready)
+	if err := awaitForward(ready, make(chan error, 1), make(chan struct{}, 1), time.Second); err != nil {
+		t.Fatalf("ready forward should succeed, got %v", err)
+	}
+}
+
+func TestAwaitForward_ErrorBeforeReady(t *testing.T) {
+	// ForwardPorts returns an error without ever closing readyChan — the old
+	// code blocked on <-readyChan forever. awaitForward must return the error.
+	forwardErr := make(chan error, 1)
+	forwardErr <- fmt.Errorf("dial tcp: connection refused")
+	err := awaitForward(make(chan struct{}), forwardErr, make(chan struct{}, 1), time.Second)
+	if err == nil {
+		t.Fatal("a forward failure must return an error, not hang or succeed")
+	}
+	if !strings.Contains(err.Error(), "connection refused") {
+		t.Errorf("error should wrap the forward failure, got %v", err)
+	}
+}
+
+func TestAwaitForward_Timeout(t *testing.T) {
+	stop := make(chan struct{}, 1)
+	// Nothing ever signals ready and no error arrives → must time out, not hang.
+	err := awaitForward(make(chan struct{}), make(chan error, 1), stop, 10*time.Millisecond)
+	if err == nil {
+		t.Fatal("a never-ready forward must time out, not hang")
+	}
+	select {
+	case <-stop:
+	default:
+		t.Error("timeout must close stopChan to tear the forwarder down")
+	}
+}
+
+// ── Blocker #3: TLS discovery and credential loading ────────────────────────
+
+// etcdTLSPod returns a Pod whose etcd container points --trusted-ca-file at a
+// secret-backed volume mount, mirroring what the operator builds.
+func etcdTLSPod() (*corev1.Pod, corev1.Container) {
+	c := corev1.Container{
+		Name:         "etcd",
+		Command:      []string{"etcd"},
+		Args:         []string{"--trusted-ca-file=/etc/etcd/pki/ca/ca.crt"},
+		VolumeMounts: []corev1.VolumeMount{{Name: "ca", MountPath: "/etc/etcd/pki/ca"}},
+	}
+	pod := &corev1.Pod{
+		Spec: corev1.PodSpec{
+			Containers: []corev1.Container{c},
+			Volumes: []corev1.Volume{{
+				Name:         "ca",
+				VolumeSource: corev1.VolumeSource{Secret: &corev1.SecretVolumeSource{SecretName: "etcd-ca"}},
+			}},
+		},
+	}
+	return pod, c
+}
+
+func TestFindSecretNameForTLS_Happy(t *testing.T) {
+	pod, c := etcdTLSPod()
+	name, err := findSecretNameForTLS(pod, c)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if name != "etcd-ca" {
+		t.Errorf("secret name = %q, want %q", name, "etcd-ca")
+	}
+}
+
+func TestFindSecretNameForTLS_NoCAFlag(t *testing.T) {
+	c := corev1.Container{Name: "etcd", Args: []string{"--listen-client-urls=http://0.0.0.0:2379"}}
+	pod := &corev1.Pod{Spec: corev1.PodSpec{Containers: []corev1.Container{c}}}
+	_, err := findSecretNameForTLS(pod, c)
+	if !errors.Is(err, errNoTrustedCAFile) {
+		t.Errorf("expected errNoTrustedCAFile sentinel, got %v", err)
+	}
+}
+
+func TestFindSecretNameForTLS_MountNotFound(t *testing.T) {
+	c := corev1.Container{Name: "etcd", Args: []string{"--trusted-ca-file=/nowhere/ca.crt"}}
+	pod := &corev1.Pod{Spec: corev1.PodSpec{Containers: []corev1.Container{c}}}
+	if _, err := findSecretNameForTLS(pod, c); err == nil {
+		t.Error("expected an error when no volume backs the CA path")
+	}
+}
+
+func TestLoadCredentials_Happy(t *testing.T) {
+	cs := fake.NewSimpleClientset(&corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{Name: "creds", Namespace: "ns1"},
+		Data: map[string][]byte{
+			corev1.BasicAuthUsernameKey: []byte("root"),
+			corev1.BasicAuthPasswordKey: []byte("s3cret"),
+		},
+	})
+	u, p, err := loadCredentials(cs, "ns1", "creds")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if u != "root" || p != "s3cret" {
+		t.Errorf("got %q/%q, want root/s3cret", u, p)
+	}
+}
+
+func TestLoadCredentials_DefaultsUsernameToRoot(t *testing.T) {
+	cs := fake.NewSimpleClientset(&corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{Name: "creds", Namespace: "ns1"},
+		Data:       map[string][]byte{corev1.BasicAuthPasswordKey: []byte("p")},
+	})
+	u, _, err := loadCredentials(cs, "ns1", "creds")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if u != "root" {
+		t.Errorf("username default = %q, want root", u)
+	}
+}
+
+func TestLoadCredentials_MissingPassword(t *testing.T) {
+	cs := fake.NewSimpleClientset(&corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{Name: "creds", Namespace: "ns1"},
+		Data:       map[string][]byte{corev1.BasicAuthUsernameKey: []byte("root")},
+	})
+	if _, _, err := loadCredentials(cs, "ns1", "creds"); err == nil {
+		t.Error("expected an error when the password key is missing/empty")
+	}
+}
+
+func TestLoadCredentials_NamespacedRef(t *testing.T) {
+	cs := fake.NewSimpleClientset(&corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{Name: "creds", Namespace: "other"},
+		Data:       map[string][]byte{corev1.BasicAuthPasswordKey: []byte("p")},
+	})
+	// Default namespace is ns1, but the "other/creds" ref must override it.
+	_, p, err := loadCredentials(cs, "ns1", "other/creds")
+	if err != nil || p != "p" {
+		t.Errorf("namespace/name ref not honored: p=%q err=%v", p, err)
+	}
+}
+
+func TestExtractTLSFiles_Happy(t *testing.T) {
+	cert, key := selfSignedPEM(t)
+	cs := fake.NewSimpleClientset(&corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{Name: "etcd-ca", Namespace: "ns1"},
+		Data:       map[string][]byte{"ca.crt": cert, "tls.crt": cert, "tls.key": key},
+	})
+	pool, clientCert, err := extractTLSFiles(cs, "ns1", "etcd-ca")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if pool == nil || clientCert == nil {
+		t.Fatal("expected a non-nil CA pool and client certificate")
+	}
+}
+
+func TestExtractTLSFiles_MissingCA(t *testing.T) {
+	cs := fake.NewSimpleClientset(&corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{Name: "etcd-ca", Namespace: "ns1"},
+		Data:       map[string][]byte{"tls.crt": []byte("x"), "tls.key": []byte("y")},
+	})
+	if _, _, err := extractTLSFiles(cs, "ns1", "etcd-ca"); err == nil {
+		t.Error("expected an error when ca.crt is absent from the secret")
+	}
+}
+
+func TestGetTLSConfig_Plaintext(t *testing.T) {
+	c := corev1.Container{Name: "etcd", Args: []string{"--listen-client-urls=http://0.0.0.0:2379"}}
+	pod := &corev1.Pod{Spec: corev1.PodSpec{Containers: []corev1.Container{c}}}
+	cfg, err := getTLSConfig(fake.NewSimpleClientset(), pod, "ns1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if cfg != nil {
+		t.Errorf("a plaintext cluster should yield a nil TLS config, got %+v", cfg)
+	}
+}
+
+func TestGetTLSConfig_TLS(t *testing.T) {
+	cert, key := selfSignedPEM(t)
+	pod, _ := etcdTLSPod()
+	cs := fake.NewSimpleClientset(&corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{Name: "etcd-ca", Namespace: "ns1"},
+		Data:       map[string][]byte{"ca.crt": cert, "tls.crt": cert, "tls.key": key},
+	})
+	cfg, err := getTLSConfig(cs, pod, "ns1")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if cfg == nil {
+		t.Fatal("expected a TLS config for a TLS-enabled cluster")
+	}
+	if cfg.MinVersion != tls.VersionTLS12 {
+		t.Errorf("MinVersion = %d, want TLS 1.2 (%d)", cfg.MinVersion, tls.VersionTLS12)
+	}
+	if cfg.RootCAs == nil || len(cfg.Certificates) != 1 {
+		t.Error("expected a populated RootCAs pool and exactly one client certificate")
+	}
+}
+
+// selfSignedPEM returns a fresh self-signed certificate and its EC private key,
+// PEM-encoded — enough to satisfy x509 parsing and tls.X509KeyPair in tests.
+func selfSignedPEM(t *testing.T) (certPEM, keyPEM []byte) {
+	t.Helper()
+	priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
+	if err != nil {
+		t.Fatalf("generate key: %v", err)
+	}
+	tmpl := &x509.Certificate{
+		SerialNumber:          big.NewInt(1),
+		Subject:               pkix.Name{CommonName: "test-etcd"},
+		NotBefore:             time.Now().Add(-time.Hour),
+		NotAfter:              time.Now().Add(time.Hour),
+		KeyUsage:              x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign,
+		BasicConstraintsValid: true,
+		IsCA:                  true,
+	}
+	der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &priv.PublicKey, priv)
+	if err != nil {
+		t.Fatalf("create certificate: %v", err)
+	}
+	certPEM = pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
+	keyDER, err := x509.MarshalECPrivateKey(priv)
+	if err != nil {
+		t.Fatalf("marshal key: %v", err)
+	}
+	keyPEM = pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER})
+	return certPEM, keyPEM
+}
diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml
index b7512661..c5cb56ab 100644
--- a/config/default/manager_auth_proxy_patch.yaml
+++ b/config/default/manager_auth_proxy_patch.yaml
@@ -31,7 +31,10 @@ spec:
           capabilities:
             drop:
               - "ALL"
-        image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1
+        # gcr.io/kubebuilder/kube-rbac-proxy is gone (the gcr.io/kubebuilder
+        # registry shut down in early 2025); quay.io/brancz is the upstream
+        # home of kube-rbac-proxy that kubebuilder pointed users to.
+        image: quay.io/brancz/kube-rbac-proxy:v0.18.2
         args:
         - "--secure-listen-address=0.0.0.0:8443"
         - "--upstream=http://127.0.0.1:8080/"
diff --git a/controllers/etcdcluster_controller.go b/controllers/etcdcluster_controller.go
index 32a9d33d..18090d1d 100644
--- a/controllers/etcdcluster_controller.go
+++ b/controllers/etcdcluster_controller.go
@@ -452,8 +452,19 @@ func (r *EtcdClusterReconciler) bootstrap(
 		// Pod label on the first reconcile of the seed Pod, so the PDB
 		// selects it from creation rather than only after MemberList runs
 		// a few reconciles later.
+		//
+		// Written as a merge patch, not an Update: the member controller
+		// reconciles the seed the moment it is created and its own
+		// Status().Update races with this write — an optimistic-locked
+		// Update here loses that race with a conflict, and a cache-backed
+		// re-Get cannot recover (the informer may not even have the
+		// object yet). This stamp happens only in the create branch —
+		// losing it would leave the seed permanently IsVoter=false, and
+		// both learner memberID discovery and promotion filter their dial
+		// endpoints to voters, wedging every later scale-up.
+		stampBase := seed.DeepCopy()
 		seed.Status.IsVoter = true
-		if err := r.Status().Update(ctx, seed); err != nil {
+		if err := r.Status().Patch(ctx, seed, client.MergeFrom(stampBase)); err != nil {
 			return ctrl.Result{}, err
 		}
 	} else if !metav1.IsControlledBy(seed, cluster) {
diff --git a/controllers/etcdcluster_controller_test.go b/controllers/etcdcluster_controller_test.go
index a767e285..4f28f18b 100644
--- a/controllers/etcdcluster_controller_test.go
+++ b/controllers/etcdcluster_controller_test.go
@@ -34,6 +34,8 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+	"sigs.k8s.io/controller-runtime/pkg/client/interceptor"
 
 	lll "github.com/cozystack/etcd-operator/api/v1alpha2"
 )
@@ -110,6 +112,67 @@ func TestBootstrap_CreatesSingleSeedMember(t *testing.T) {
 	if !seed.Spec.Bootstrap {
 		t.Fatalf("seed member must be marked Bootstrap=true")
 	}
+	// The seed is pre-stamped IsVoter=true at create time (it forms the
+	// cluster on its own — never a learner). Losing the stamp wedges every
+	// later scale-up: learner memberID discovery and promotion dial voters
+	// only.
+	if !seed.Status.IsVoter {
+		t.Fatalf("seed Status.IsVoter must be pre-stamped true at create; got false")
+	}
+}
+
+// TestBootstrap_SeedIsVoterStampSurvivesConcurrentStatusWriter pins the
+// merge-patch choice in the seed IsVoter pre-stamp
+// (etcdcluster_controller.go bootstrap). The member controller starts
+// reconciling the seed the moment Create returns, and its own
+// Status().Update bumps the resourceVersion concurrently with this stamp.
+// An optimistic-locked Status().Update here loses that race with a
+// Conflict, and — because the stamp lives only in the create branch — the
+// seed would stay IsVoter=false forever. The interceptor models the
+// concurrent writer by failing every optimistic-locked EtcdMember status
+// Update with a Conflict; the merge patch carries no resourceVersion and
+// must land regardless. This test fails against the previous
+// Status().Update implementation.
+func TestBootstrap_SeedIsVoterStampSurvivesConcurrentStatusWriter(t *testing.T) {
+	ctx := context.Background()
+	cluster := &lll.EtcdCluster{
+		ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "ns"},
+		Spec: lll.EtcdClusterSpec{
+			Replicas: ptrInt32(3),
+			Version:  "3.5.17",
+			Storage:  lll.StorageSpec{Size: quickQty(t, "1Gi")},
+		},
+	}
+	s := testScheme(t)
+	c := fake.NewClientBuilder().
+		WithScheme(s).
+		WithObjects(cluster).
+		WithStatusSubresource(&lll.EtcdCluster{}, &lll.EtcdMember{}, &lll.EtcdSnapshot{}).
+		WithInterceptorFuncs(interceptor.Funcs{
+			SubResourceUpdate: func(ctx context.Context, cl client.Client, sub string, obj client.Object, opts ...client.SubResourceUpdateOption) error {
+				if _, isMember := obj.(*lll.EtcdMember); isMember && sub == "status" {
+					return apierrors.NewConflict(
+						schema.GroupResource{Group: lll.GroupVersion.Group, Resource: "etcdmembers"},
+						obj.GetName(), errors.New("simulated concurrent status writer"))
+				}
+				return cl.SubResource(sub).Update(ctx, obj, opts...)
+			},
+		}).
+		Build()
+	r := &EtcdClusterReconciler{Client: c, Scheme: s, EtcdClientFactory: factoryReturning(newFakeEtcd(0xdeadbeef))}
+
+	reconcileUntilStable(t, r, c, "test", "ns", 8)
+
+	members := &lll.EtcdMemberList{}
+	if err := c.List(ctx, members, client.InNamespace("ns")); err != nil {
+		t.Fatalf("List: %v", err)
+	}
+	if len(members.Items) != 1 {
+		t.Fatalf("expected one seed member; got %d", len(members.Items))
+	}
+	if !members.Items[0].Status.IsVoter {
+		t.Fatal("seed IsVoter stamp lost under a concurrent status writer; the stamp must be a merge patch, not an optimistic-locked Update")
+	}
 }
 
 // TestObservedSpec_LocksMidFlight pins the locking pattern's correctness
diff --git a/hack/e2e.sh b/hack/e2e.sh
new file mode 100755
index 00000000..3ecca865
--- /dev/null
+++ b/hack/e2e.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+# End-to-end harness: provisions a kind cluster with cert-manager and Kamaji,
+# builds and deploys the operator from the working tree, then runs the e2e
+# suite (test/e2e). Destroys the kind cluster on exit unless KEEP_CLUSTER=1.
+#
+# Requirements on the host: docker, kubectl, helm, go. kind is installed
+# into ./bin automatically when missing.
+set -euo pipefail
+
+# Always build/pull for the host architecture. A user-level
+# DOCKER_DEFAULT_PLATFORM=linux/amd64 (common on Apple Silicon for x86-only
+# tooling) would otherwise pull an emulated kind node whose control plane
+# never becomes healthy.
+unset DOCKER_DEFAULT_PLATFORM
+
+# ── Pinned component versions ────────────────────────────────────────────
+# v0.32.0+ required with Docker 29 ("failed to detect containerd
+# snapshotter" on `kind load` with older releases).
+KIND_VERSION=v0.32.0
+KIND_NODE_IMAGE=kindest/node:v1.34.0
+CERT_MANAGER_VERSION=v1.18.2
+KAMAJI_CHART_VERSION=1.0.0
+# The TenantControlPlane Kubernetes version lives in
+# test/e2e/testdata/04-tenantcontrolplane.yaml and must stay within the skew
+# supported by KAMAJI_CHART_VERSION.
+
+KIND_CLUSTER_NAME=${KIND_CLUSTER_NAME:-etcd-operator-e2e}
+IMG=${IMG:-etcd-operator:e2e}
+KEEP_CLUSTER=${KEEP_CLUSTER:-}
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT"
+LOCALBIN="$ROOT/bin"
+mkdir -p "$LOCALBIN"
+export PATH="$LOCALBIN:$PATH"
+
+if ! command -v kind >/dev/null 2>&1; then
+    echo "--- installing kind $KIND_VERSION into $LOCALBIN"
+    GOBIN="$LOCALBIN" go install sigs.k8s.io/kind@"$KIND_VERSION"
+fi
+
+# dump_diagnostics prints cluster state for post-mortem debugging. It MUST run
+# from the EXIT trap, before the kind cluster is deleted — a separate CI step
+# after this script cannot do it, because by the time the step runs the trap
+# has already torn the cluster down and every kubectl call would fail.
+dump_diagnostics() {
+    echo "--- e2e failed; dumping cluster state before teardown"
+    kubectl get etcdclusters,etcdmembers,pods,certificates,secrets -A || true
+    kubectl get datastores,tenantcontrolplanes -A || true
+    kubectl -n etcd-operator-system logs deploy/etcd-operator-controller-manager --tail=200 || true
+    kubectl -n kamaji-system logs deploy/kamaji --tail=200 || true
+    # The tenant namespace is where the longest wait (TenantControlPlane
+    # Ready) fails — dump every pod's logs there, or the one failure mode
+    # the suite is most likely to hit leaves no trace.
+    for p in $(kubectl -n kamaji-e2e get pods -o name 2>/dev/null); do
+        echo "--- logs: kamaji-e2e/$p"
+        kubectl -n kamaji-e2e logs "$p" --all-containers --tail=100 || true
+    done
+}
+
+cleanup() {
+    status=$?
+    if [ "$status" -ne 0 ]; then
+        dump_diagnostics
+    fi
+    if [ -n "$KEEP_CLUSTER" ]; then
+        echo "--- KEEP_CLUSTER set; kind cluster '$KIND_CLUSTER_NAME' left running"
+        return
+    fi
+    echo "--- deleting kind cluster '$KIND_CLUSTER_NAME'"
+    kind delete cluster --name "$KIND_CLUSTER_NAME" >/dev/null 2>&1 || true
+}
+trap cleanup EXIT
+
+echo "--- creating kind cluster '$KIND_CLUSTER_NAME' ($KIND_NODE_IMAGE)"
+kind create cluster --name "$KIND_CLUSTER_NAME" --image "$KIND_NODE_IMAGE" --wait 5m
+kubectl config use-context "kind-$KIND_CLUSTER_NAME"
+
+echo "--- installing cert-manager $CERT_MANAGER_VERSION"
+kubectl apply -f "https://github.com/cert-manager/cert-manager/releases/download/${CERT_MANAGER_VERSION}/cert-manager.yaml"
+kubectl -n cert-manager wait deploy --all --for=condition=Available --timeout=5m
+
+echo "--- installing Kamaji (chart $KAMAJI_CHART_VERSION)"
+# etcd.deploy=false / datastore.enabled=false: Kamaji's bundled kamaji-etcd
+# stays out — the DataStore under test is the operator-managed cluster the
+# suite creates (test/e2e/testdata/03-datastore.yaml), whose name the
+# manager receives as its default via datastore.nameOverride.
+helm repo add clastix https://clastix.github.io/charts --force-update >/dev/null
+# The kamaji manager exits at startup while its default DataStore is
+# missing, and the DataStore cannot be created later because the chart's
+# validating webhook (failurePolicy=Fail) is served by that same crashing
+# pod. Break the cycle the way the chart itself does for its bundled
+# datastore (a pre-install hook): install the CRDs first and create the
+# DataStore before the webhook configuration exists. The suite re-applies
+# the same fixture later, which is a no-op.
+helm show crds clastix/kamaji --version "$KAMAJI_CHART_VERSION" | kubectl apply -f -
+kubectl apply -f test/e2e/testdata/03-datastore.yaml
+helm upgrade --install kamaji clastix/kamaji \
+    --version "$KAMAJI_CHART_VERSION" \
+    --namespace kamaji-system --create-namespace \
+    --set etcd.deploy=false \
+    --set datastore.enabled=false \
+    --set datastore.nameOverride=kamaji-e2e \
+    --wait --timeout 5m
+# NOTE: the literal "kamaji-e2e" above must stay in sync with its two other
+# copies: e2eNamespace in test/e2e/kamaji_datastore_test.go and the
+# metadata.namespace/DataStore name in test/e2e/testdata/*.yaml.
+
+echo "--- building and deploying the operator ($IMG)"
+docker build -t "$IMG" .
+kind load docker-image "$IMG" --name "$KIND_CLUSTER_NAME"
+make install deploy IMG="$IMG"
+kubectl -n etcd-operator-system wait deploy/etcd-operator-controller-manager \
+    --for=condition=Available --timeout=5m
+
+echo "--- running e2e suite"
+make test-e2e
diff --git a/test/e2e/kamaji_datastore_test.go b/test/e2e/kamaji_datastore_test.go
new file mode 100644
index 00000000..81a0d5dd
--- /dev/null
+++ b/test/e2e/kamaji_datastore_test.go
@@ -0,0 +1,346 @@
+//go:build e2e
+
+package e2e
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"net"
+	"net/http"
+	"net/url"
+	"strings"
+	"testing"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/kubernetes/scheme"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/clientcmd"
+	"k8s.io/client-go/tools/portforward"
+	"k8s.io/client-go/tools/remotecommand"
+	"k8s.io/client-go/transport/spdy"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	etcdv1alpha2 "github.com/cozystack/etcd-operator/api/v1alpha2"
+)
+
+const (
+	// e2eNamespace must stay in sync with two other copies of the literal:
+	// the fixtures (test/e2e/testdata/*.yaml, metadata.namespace) and the
+	// harness (hack/e2e.sh: `datastore.nameOverride=kamaji-e2e` and the
+	// diagnostics dump). Changing one without the others breaks the suite
+	// in confusing ways (Kamaji's manager looks up the DataStore by name).
+	e2eNamespace = "kamaji-e2e"
+	clusterName  = "etcd"
+	tcpName      = "tenant1"
+	proofName    = "e2e-proof" // ConfigMap created via the tenant API, then grepped out of etcd
+)
+
+// TestKamajiDataStore proves the documented consumer story end to end: an
+// operator-managed, full-mTLS EtcdCluster serves as the backing store of a
+// Kamaji DataStore, a TenantControlPlane comes up on it, its API answers,
+// and objects written through the tenant API land as keys in our etcd.
+func TestKamajiDataStore(t *testing.T) {
+	ctx := context.Background()
+	fixtures := fixturePaths(t)
+
+	// 00-namespace, 01-pki — namespace and the cert-manager PKI.
+	applyFixture(ctx, t, fixtures[0])
+	applyFixture(ctx, t, fixtures[1])
+	waitFor(ctx, t, 2*time.Minute, "PKI secrets issued", func(ctx context.Context) error {
+		if err := secretExists(e2eNamespace, "etcd-ca-tls", "tls.crt", "tls.key")(ctx); err != nil {
+			return err
+		}
+		return secretExists(e2eNamespace, "kamaji-etcd-client-tls", "tls.crt", "tls.key")(ctx)
+	})
+
+	// 02-etcdcluster — the cluster under test.
+	applyFixture(ctx, t, fixtures[2])
+	waitFor(ctx, t, 5*time.Minute, "EtcdCluster Available", etcdClusterAvailable(e2eNamespace, clusterName))
+	waitFor(ctx, t, 1*time.Minute, "operator-issued TLS secrets", func(ctx context.Context) error {
+		for _, name := range []string{
+			clusterName + "-server-tls",
+			clusterName + "-operator-client-tls",
+			clusterName + "-peer-tls",
+		} {
+			if err := secretExists(e2eNamespace, name, "tls.crt", "tls.key")(ctx); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+	for _, svc := range []string{clusterName, clusterName + "-client"} {
+		if err := kube.Get(ctx, client.ObjectKey{Namespace: e2eNamespace, Name: svc}, &corev1.Service{}); err != nil {
+			t.Fatalf("expected Service %s/%s: %v", e2eNamespace, svc, err)
+		}
+	}
+
+	// 03-datastore, 04-tenantcontrolplane — the Kamaji side.
+	applyFixture(ctx, t, fixtures[3])
+	applyFixture(ctx, t, fixtures[4])
+	waitFor(ctx, t, 10*time.Minute, "TenantControlPlane Ready", func(ctx context.Context) error {
+		status, err := unstructuredField(ctx, "kamaji.clastix.io/v1alpha1", "TenantControlPlane",
+			e2eNamespace, tcpName, "status", "kubernetesResources", "version", "status")
+		if err != nil {
+			return err
+		}
+		if status != "Ready" {
+			return fmt.Errorf("status.kubernetesResources.version.status=%q", status)
+		}
+		return nil
+	})
+
+	// Reach the tenant API server through a port-forward and prove it works.
+	tenant, stop := tenantClient(ctx, t)
+	defer stop()
+
+	rt, err := rest.TransportFor(tenant)
+	if err != nil {
+		t.Fatalf("build tenant transport: %v", err)
+	}
+	code, body := httpGet(t, rt, tenant.Host+"/healthz")
+	if code != http.StatusOK || body != "ok" {
+		t.Fatalf("tenant /healthz: code=%d body=%q", code, body)
+	}
+	t.Log("tenant /healthz ok")
+
+	tenantSet, err := kubernetes.NewForConfig(tenant)
+	if err != nil {
+		t.Fatalf("build tenant clientset: %v", err)
+	}
+	cm := &corev1.ConfigMap{
+		ObjectMeta: metav1.ObjectMeta{Name: proofName, Namespace: "default"},
+		Data:       map[string]string{"written-by": "etcd-operator-e2e"},
+	}
+	if _, err := tenantSet.CoreV1().ConfigMaps("default").Create(ctx, cm, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) {
+		t.Fatalf("create ConfigMap via tenant API: %v", err)
+	}
+	got, err := tenantSet.CoreV1().ConfigMaps("default").Get(ctx, proofName, metav1.GetOptions{})
+	if err != nil || got.Data["written-by"] != "etcd-operator-e2e" {
+		t.Fatalf("read ConfigMap back via tenant API: %v (data=%v)", err, got.Data)
+	}
+	t.Log("tenant API ConfigMap roundtrip ok")
+
+	// The ConfigMap must exist as a key in *our* etcd — that is the whole
+	// point of the DataStore wiring.
+	keys := etcdKeys(ctx, t)
+	if !strings.Contains(keys, proofName) {
+		t.Fatalf("etcd key dump does not contain %q;\nfirst 2000 bytes:\n%s", proofName, truncate(keys, 2000))
+	}
+	t.Logf("found %q among etcd keys", proofName)
+
+	// Teardown — reverse order, waiting where deletion is asynchronous.
+	deleteAndWait(ctx, t, "kamaji.clastix.io/v1alpha1", "TenantControlPlane", e2eNamespace, tcpName, 5*time.Minute)
+	deleteAndWait(ctx, t, "kamaji.clastix.io/v1alpha1", "DataStore", "", "kamaji-e2e", 2*time.Minute)
+	ec := &etcdv1alpha2.EtcdCluster{ObjectMeta: metav1.ObjectMeta{Namespace: e2eNamespace, Name: clusterName}}
+	if err := kube.Delete(ctx, ec); err != nil && !apierrors.IsNotFound(err) {
+		t.Fatalf("delete EtcdCluster: %v", err)
+	}
+	waitFor(ctx, t, 5*time.Minute, "etcd pods gone", func(ctx context.Context) error {
+		pods := &corev1.PodList{}
+		if err := kube.List(ctx, pods, client.InNamespace(e2eNamespace),
+			client.MatchingLabels{"etcd-operator.cozystack.io/cluster": clusterName}); err != nil {
+			return err
+		}
+		if n := len(pods.Items); n > 0 {
+			return fmt.Errorf("%d etcd pods still present", n)
+		}
+		return nil
+	})
+}
+
+// tenantClient builds a rest.Config for the tenant API server: admin
+// kubeconfig from the Kamaji-generated Secret, transport rerouted through a
+// port-forward to the apiserver pod (the kubeconfig points at a ClusterIP
+// that is unreachable from outside the management cluster).
+func tenantClient(ctx context.Context, t *testing.T) (*rest.Config, func()) {
+	t.Helper()
+
+	secretName, err := unstructuredField(ctx, "kamaji.clastix.io/v1alpha1", "TenantControlPlane",
+		e2eNamespace, tcpName, "status", "kubeconfig", "admin", "secretName")
+	if err != nil {
+		t.Fatalf("read admin kubeconfig secret name: %v", err)
+	}
+	sec := &corev1.Secret{}
+	if err := kube.Get(ctx, client.ObjectKey{Namespace: e2eNamespace, Name: secretName}, sec); err != nil {
+		t.Fatalf("get kubeconfig secret %s: %v", secretName, err)
+	}
+	var kubeconfig []byte
+	for _, key := range []string{"super-admin.conf", "admin.conf", "super-admin.svc", "admin.svc"} {
+		if len(sec.Data[key]) > 0 {
+			kubeconfig = sec.Data[key]
+			break
+		}
+	}
+	if kubeconfig == nil {
+		t.Fatalf("no kubeconfig key in secret %s (keys: %v)", secretName, mapKeys(sec.Data))
+	}
+	tenant, err := clientcmd.RESTConfigFromKubeConfig(kubeconfig)
+	if err != nil {
+		t.Fatalf("parse tenant kubeconfig: %v", err)
+	}
+
+	localPort, stop := portForwardAPIServer(ctx, t)
+
+	// Keep certificate verification honest: ServerName stays the original
+	// endpoint host (present in the apiserver cert SANs), only the dial
+	// target moves to the forwarded port.
+	host, _, err := net.SplitHostPort(strings.TrimPrefix(tenant.Host, "https://"))
+	if err != nil {
+		t.Fatalf("parse tenant host %q: %v", tenant.Host, err)
+	}
+	tenant.TLSClientConfig.ServerName = host
+	tenant.Host = fmt.Sprintf("https://127.0.0.1:%d", localPort)
+	return tenant, stop
+}
+
+// portForwardAPIServer forwards a random local port to port 6443 of the
+// first Ready tenant apiserver pod and returns the local port.
+func portForwardAPIServer(ctx context.Context, t *testing.T) (uint16, func()) {
+	t.Helper()
+
+	deploy, err := clientset.AppsV1().Deployments(e2eNamespace).Get(ctx, tcpName, metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("get tenant control plane deployment: %v", err)
+	}
+	selector := metav1.FormatLabelSelector(deploy.Spec.Selector)
+	pods, err := clientset.CoreV1().Pods(e2eNamespace).List(ctx, metav1.ListOptions{LabelSelector: selector})
+	if err != nil || len(pods.Items) == 0 {
+		t.Fatalf("list tenant control plane pods (%s): %v", selector, err)
+	}
+	var podName string
+	for _, p := range pods.Items {
+		if p.Status.Phase == corev1.PodRunning {
+			podName = p.Name
+			break
+		}
+	}
+	if podName == "" {
+		t.Fatalf("no running tenant control plane pod among %d", len(pods.Items))
+	}
+
+	req := clientset.CoreV1().RESTClient().Post().
+		Resource("pods").Namespace(e2eNamespace).Name(podName).SubResource("portforward")
+	transport, upgrader, err := spdy.RoundTripperFor(cfg)
+	if err != nil {
+		t.Fatalf("build spdy roundtripper: %v", err)
+	}
+	dialer := spdy.NewDialer(upgrader, &http.Client{Transport: transport}, "POST", &url.URL{
+		Scheme: req.URL().Scheme, Host: req.URL().Host, Path: req.URL().Path,
+	})
+
+	stopCh := make(chan struct{})
+	readyCh := make(chan struct{})
+	fw, err := portforward.New(dialer, []string{"0:6443"}, stopCh, readyCh, nil, &testWriter{t})
+	if err != nil {
+		t.Fatalf("create port-forward: %v", err)
+	}
+	errCh := make(chan error, 1)
+	go func() { errCh <- fw.ForwardPorts() }()
+	select {
+	case <-readyCh:
+	case err := <-errCh:
+		t.Fatalf("port-forward to %s: %v", podName, err)
+	case <-time.After(30 * time.Second):
+		t.Fatalf("port-forward to %s: timeout", podName)
+	}
+	ports, err := fw.GetPorts()
+	if err != nil || len(ports) == 0 {
+		t.Fatalf("get forwarded ports: %v", err)
+	}
+	t.Logf("port-forward 127.0.0.1:%d -> %s:6443", ports[0].Local, podName)
+	return ports[0].Local, func() { close(stopCh) }
+}
+
+// etcdKeys dumps all keys from the etcd cluster by exec-ing etcdctl inside
+// a member pod, authenticating with the server keypair (issued with
+// clientAuth EKU for exactly this kind of loopback use). Member pods carry
+// operator-generated names, so the pod is found via the cluster label.
+func etcdKeys(ctx context.Context, t *testing.T) string {
+	t.Helper()
+	pods := &corev1.PodList{}
+	if err := kube.List(ctx, pods, client.InNamespace(e2eNamespace),
+		client.MatchingLabels{"etcd-operator.cozystack.io/cluster": clusterName}); err != nil || len(pods.Items) == 0 {
+		t.Fatalf("list etcd member pods: %v (found %d)", err, len(pods.Items))
+	}
+	stdout, stderr, err := podExec(ctx, e2eNamespace, pods.Items[0].Name, "etcd", []string{
+		"etcdctl",
+		"--endpoints=https://localhost:2379",
+		"--cert=/etc/etcd/tls/client/tls.crt",
+		"--key=/etc/etcd/tls/client/tls.key",
+		"--cacert=/etc/etcd/tls/client/ca.crt",
+		"get", "", "--prefix", "--keys-only",
+	})
+	if err != nil {
+		t.Fatalf("etcdctl key dump: %v (stderr: %s)", err, stderr)
+	}
+	return stdout
+}
+
+func podExec(ctx context.Context, namespace, pod, container string, command []string) (string, string, error) {
+	req := clientset.CoreV1().RESTClient().Post().
+		Resource("pods").Namespace(namespace).Name(pod).SubResource("exec").
+		VersionedParams(&corev1.PodExecOptions{
+			Container: container,
+			Command:   command,
+			Stdout:    true,
+			Stderr:    true,
+		}, scheme.ParameterCodec)
+	exec, err := remotecommand.NewSPDYExecutor(cfg, "POST", req.URL())
+	if err != nil {
+		return "", "", err
+	}
+	var stdout, stderr bytes.Buffer
+	err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{Stdout: &stdout, Stderr: &stderr})
+	return stdout.String(), stderr.String(), err
+}
+
+// deleteAndWait deletes an unstructured object and waits until it is gone.
+func deleteAndWait(ctx context.Context, t *testing.T, apiVersion, kind, namespace, name string, timeout time.Duration) {
+	t.Helper()
+	obj := &unstructured.Unstructured{}
+	obj.SetAPIVersion(apiVersion)
+	obj.SetKind(kind)
+	obj.SetNamespace(namespace)
+	obj.SetName(name)
+	if err := kube.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) {
+		t.Fatalf("delete %s %s/%s: %v", kind, namespace, name, err)
+	}
+	waitFor(ctx, t, timeout, fmt.Sprintf("%s %s/%s deleted", kind, namespace, name), func(ctx context.Context) error {
+		err := kube.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, obj.DeepCopy())
+		if apierrors.IsNotFound(err) {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+		return fmt.Errorf("still present")
+	})
+}
+
+type testWriter struct{ t *testing.T }
+
+func (w *testWriter) Write(p []byte) (int, error) {
+	w.t.Logf("port-forward: %s", strings.TrimSpace(string(p)))
+	return len(p), nil
+}
+
+func mapKeys(m map[string][]byte) []string {
+	out := make([]string, 0, len(m))
+	for k := range m {
+		out = append(out, k)
+	}
+	return out
+}
+
+func truncate(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return s[:n] + "…"
+}
diff --git a/test/e2e/suite_test.go b/test/e2e/suite_test.go
new file mode 100644
index 00000000..0f38c300
--- /dev/null
+++ b/test/e2e/suite_test.go
@@ -0,0 +1,213 @@
+//go:build e2e
+
+// Package e2e holds integration tests that run against a real cluster
+// (kind in CI) with the operator, cert-manager and Kamaji installed.
+// hack/e2e.sh provisions all of that and then runs `make test-e2e`.
+package e2e
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"sort"
+	"testing"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	apimeta "k8s.io/apimachinery/pkg/api/meta"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
+	utilyaml "k8s.io/apimachinery/pkg/util/yaml"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/clientcmd"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	etcdv1alpha2 "github.com/cozystack/etcd-operator/api/v1alpha2"
+)
+
+const fieldOwner = client.FieldOwner("etcd-operator-e2e")
+
+var (
+	cfg       *rest.Config
+	kube      client.Client
+	clientset *kubernetes.Clientset
+)
+
+func TestMain(m *testing.M) {
+	rules := clientcmd.NewDefaultClientConfigLoadingRules()
+	c, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(rules, nil).ClientConfig()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: cannot load kubeconfig: %v\n", err)
+		os.Exit(1)
+	}
+	cfg = c
+
+	scheme := runtime.NewScheme()
+	if err := corev1.AddToScheme(scheme); err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: %v\n", err)
+		os.Exit(1)
+	}
+	if err := etcdv1alpha2.AddToScheme(scheme); err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: %v\n", err)
+		os.Exit(1)
+	}
+	kube, err = client.New(cfg, client.Options{Scheme: scheme})
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: cannot build client: %v\n", err)
+		os.Exit(1)
+	}
+	clientset, err = kubernetes.NewForConfig(cfg)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: cannot build clientset: %v\n", err)
+		os.Exit(1)
+	}
+
+	os.Exit(m.Run())
+}
+
+// applyFixture server-side-applies every document of a testdata YAML file.
+// Unknown-to-the-scheme kinds (cert-manager, Kamaji) go through as
+// unstructured, so only the CRDs have to exist on the cluster.
+func applyFixture(ctx context.Context, t *testing.T, path string) []*unstructured.Unstructured {
+	t.Helper()
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read fixture %s: %v", path, err)
+	}
+	var applied []*unstructured.Unstructured
+	dec := utilyaml.NewYAMLReader(bufio.NewReader(bytes.NewReader(raw)))
+	for {
+		doc, err := dec.Read()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatalf("parse fixture %s: %v", path, err)
+		}
+		obj := &unstructured.Unstructured{}
+		if err := utilyaml.Unmarshal(doc, &obj.Object); err != nil {
+			t.Fatalf("decode fixture %s: %v", path, err)
+		}
+		if len(obj.Object) == 0 { // comment-only document
+			continue
+		}
+		if err := kube.Patch(ctx, obj, client.Apply, fieldOwner, client.ForceOwnership); err != nil {
+			t.Fatalf("apply %s %s/%s from %s: %v",
+				obj.GetKind(), obj.GetNamespace(), obj.GetName(), path, err)
+		}
+		t.Logf("applied %s %s/%s", obj.GetKind(), obj.GetNamespace(), obj.GetName())
+		applied = append(applied, obj)
+	}
+	return applied
+}
+
+// fixturePaths returns testdata/*.yaml sorted by name; the two-digit
+// prefixes encode apply order.
+func fixturePaths(t *testing.T) []string {
+	t.Helper()
+	paths, err := filepath.Glob(filepath.Join("testdata", "*.yaml"))
+	if err != nil || len(paths) == 0 {
+		t.Fatalf("no fixtures found: %v", err)
+	}
+	sort.Strings(paths)
+	return paths
+}
+
+// waitFor polls fn until it returns nil or the timeout elapses. fn's last
+// error is included in the failure to make timeouts diagnosable.
+func waitFor(ctx context.Context, t *testing.T, timeout time.Duration, desc string, fn func(context.Context) error) {
+	t.Helper()
+	t.Logf("waiting up to %s for %s", timeout, desc)
+	deadline := time.Now().Add(timeout)
+	var lastErr error
+	for {
+		cctx, cancel := context.WithTimeout(ctx, 30*time.Second)
+		lastErr = fn(cctx)
+		cancel()
+		if lastErr == nil {
+			return
+		}
+		if time.Now().After(deadline) {
+			t.Fatalf("timed out waiting for %s: %v", desc, lastErr)
+		}
+		select {
+		case <-ctx.Done():
+			t.Fatalf("context cancelled waiting for %s: %v (last: %v)", desc, ctx.Err(), lastErr)
+		case <-time.After(5 * time.Second):
+		}
+	}
+}
+
+// secretExists fails the wait round unless the Secret exists and carries
+// all the listed keys with non-empty values.
+func secretExists(namespace, name string, keys ...string) func(context.Context) error {
+	return func(ctx context.Context) error {
+		sec := &corev1.Secret{}
+		if err := kube.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, sec); err != nil {
+			return err
+		}
+		for _, k := range keys {
+			if len(sec.Data[k]) == 0 {
+				return fmt.Errorf("secret %s/%s missing key %q", namespace, name, k)
+			}
+		}
+		return nil
+	}
+}
+
+// etcdClusterAvailable checks the typed EtcdCluster Available condition.
+func etcdClusterAvailable(namespace, name string) func(context.Context) error {
+	return func(ctx context.Context) error {
+		ec := &etcdv1alpha2.EtcdCluster{}
+		if err := kube.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, ec); err != nil {
+			return err
+		}
+		cond := apimeta.FindStatusCondition(ec.Status.Conditions, etcdv1alpha2.ClusterAvailable)
+		if cond == nil {
+			return fmt.Errorf("condition %s not reported yet", etcdv1alpha2.ClusterAvailable)
+		}
+		if cond.Status != "True" {
+			return fmt.Errorf("condition %s=%s (%s): %s", cond.Type, cond.Status, cond.Reason, cond.Message)
+		}
+		return nil
+	}
+}
+
+// unstructuredField reads a dotted path from an unstructured object fetched
+// live from the cluster.
+func unstructuredField(ctx context.Context, apiVersion, kind, namespace, name string, fields ...string) (string, error) {
+	obj := &unstructured.Unstructured{}
+	obj.SetAPIVersion(apiVersion)
+	obj.SetKind(kind)
+	if err := kube.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, obj); err != nil {
+		return "", err
+	}
+	val, found, err := unstructured.NestedString(obj.Object, fields...)
+	if err != nil || !found {
+		return "", fmt.Errorf("field %v not found on %s %s/%s: %v", fields, kind, namespace, name, err)
+	}
+	return val, nil
+}
+
+// httpGet performs a plain GET against url with the given TLS-configured
+// transport and returns the body.
+func httpGet(t *testing.T, rt http.RoundTripper, url string) (int, string) {
+	t.Helper()
+	httpClient := &http.Client{Transport: rt, Timeout: 15 * time.Second}
+	resp, err := httpClient.Get(url)
+	if err != nil {
+		t.Fatalf("GET %s: %v", url, err)
+	}
+	defer resp.Body.Close()
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		t.Fatalf("GET %s: read body: %v", url, err)
+	}
+	return resp.StatusCode, string(body)
+}
diff --git a/test/e2e/testdata/00-namespace.yaml b/test/e2e/testdata/00-namespace.yaml
new file mode 100644
index 00000000..0349908b
--- /dev/null
+++ b/test/e2e/testdata/00-namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: kamaji-e2e
diff --git a/test/e2e/testdata/01-pki.yaml b/test/e2e/testdata/01-pki.yaml
new file mode 100644
index 00000000..4bfc7d8c
--- /dev/null
+++ b/test/e2e/testdata/01-pki.yaml
@@ -0,0 +1,59 @@
+# PKI for the Kamaji DataStore scenario. One CA signs everything: the
+# operator's cert-manager-mode certificates (server, operator-client, peer)
+# and Kamaji's own client certificate. Kamaji additionally receives the CA
+# *private key* through the DataStore — its etcd driver signs a per-TCP
+# apiserver-etcd-client certificate with it, so etcd (which trusts this CA
+# for client auth) accepts tenant API servers without further wiring.
+apiVersion: cert-manager.io/v1
+kind: Issuer
+metadata:
+  name: selfsigned
+  namespace: kamaji-e2e
+spec:
+  selfSigned: {}
+---
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: etcd-ca
+  namespace: kamaji-e2e
+spec:
+  isCA: true
+  commonName: etcd-ca
+  secretName: etcd-ca-tls
+  duration: 87600h
+  usages:
+  - signing
+  - key encipherment
+  - cert sign
+  issuerRef:
+    name: selfsigned
+    kind: Issuer
+---
+apiVersion: cert-manager.io/v1
+kind: Issuer
+metadata:
+  name: etcd-ca-issuer
+  namespace: kamaji-e2e
+spec:
+  ca:
+    secretName: etcd-ca-tls
+---
+# Client certificate Kamaji presents on its own dials (health checks,
+# datastore setup). CN=root mirrors Cozystack's packages/extra/etcd chart.
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: kamaji-etcd-client
+  namespace: kamaji-e2e
+spec:
+  commonName: root
+  secretName: kamaji-etcd-client-tls
+  duration: 87600h
+  usages:
+  - signing
+  - key encipherment
+  - client auth
+  issuerRef:
+    name: etcd-ca-issuer
+    kind: Issuer
diff --git a/test/e2e/testdata/02-etcdcluster.yaml b/test/e2e/testdata/02-etcdcluster.yaml
new file mode 100644
index 00000000..1cc3233e
--- /dev/null
+++ b/test/e2e/testdata/02-etcdcluster.yaml
@@ -0,0 +1,24 @@
+# Full-mTLS EtcdCluster in cert-manager mode. operatorClientIssuerRef turns
+# on etcd client-cert verification (--client-cert-auth) against the issuing
+# CA, which is the same CA the Kamaji DataStore signs tenant clients with.
+apiVersion: etcd-operator.cozystack.io/v1alpha2
+kind: EtcdCluster
+metadata:
+  name: etcd
+  namespace: kamaji-e2e
+spec:
+  replicas: 3
+  version: "3.6.11"
+  storage:
+    size: 1Gi
+  tls:
+    peer:
+      certManager:
+        issuerRef:
+          name: etcd-ca-issuer
+    client:
+      certManager:
+        serverIssuerRef:
+          name: etcd-ca-issuer
+        operatorClientIssuerRef:
+          name: etcd-ca-issuer
diff --git a/test/e2e/testdata/03-datastore.yaml b/test/e2e/testdata/03-datastore.yaml
new file mode 100644
index 00000000..f5f6a714
--- /dev/null
+++ b/test/e2e/testdata/03-datastore.yaml
@@ -0,0 +1,35 @@
+# Kamaji DataStore backed by the operator-managed cluster. Field layout is
+# the same as Cozystack's packages/extra/etcd/templates/datastore.yaml: the
+# CA entry carries both halves of the CA keypair (Kamaji signs per-TCP etcd
+# client certs with it), the clientCertificate is Kamaji's own identity.
+apiVersion: kamaji.clastix.io/v1alpha1
+kind: DataStore
+metadata:
+  name: kamaji-e2e
+spec:
+  driver: etcd
+  endpoints:
+  - etcd-client.kamaji-e2e.svc:2379
+  tlsConfig:
+    certificateAuthority:
+      certificate:
+        secretReference:
+          name: etcd-ca-tls
+          namespace: kamaji-e2e
+          keyPath: tls.crt
+      privateKey:
+        secretReference:
+          name: etcd-ca-tls
+          namespace: kamaji-e2e
+          keyPath: tls.key
+    clientCertificate:
+      certificate:
+        secretReference:
+          name: kamaji-etcd-client-tls
+          namespace: kamaji-e2e
+          keyPath: tls.crt
+      privateKey:
+        secretReference:
+          name: kamaji-etcd-client-tls
+          namespace: kamaji-e2e
+          keyPath: tls.key
diff --git a/test/e2e/testdata/04-tenantcontrolplane.yaml b/test/e2e/testdata/04-tenantcontrolplane.yaml
new file mode 100644
index 00000000..635256e4
--- /dev/null
+++ b/test/e2e/testdata/04-tenantcontrolplane.yaml
@@ -0,0 +1,22 @@
+# Minimal tenant control plane: one apiserver replica, ClusterIP service,
+# no konnectivity (the test attaches no worker nodes). The version must stay
+# within the skew supported by the pinned Kamaji release (hack/e2e.sh).
+apiVersion: kamaji.clastix.io/v1alpha1
+kind: TenantControlPlane
+metadata:
+  name: tenant1
+  namespace: kamaji-e2e
+spec:
+  dataStore: kamaji-e2e
+  controlPlane:
+    deployment:
+      replicas: 1
+    service:
+      serviceType: ClusterIP
+  kubernetes:
+    version: v1.30.2
+    kubelet:
+      cgroupfs: systemd
+  networkProfile:
+    port: 6443
+  addons: {}