Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name: CI

on:
# No PR branch filter: PRs here target main and integration branches (e.g.
# v1) alike; the old `master` filter matched nothing, so CI never ran on PRs.
pull_request:
branches: [ master ]
push:
branches: [ master ]
branches: [ main ]

jobs:
verify:
Expand Down
48 changes: 48 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: E2E

on:
# No PR branch filter: PRs in this repo target main as well as integration
# branches (e.g. v1), and the suite must gate all of them. A filter naming
# a branch PRs never target (this said `master`, which does not exist) means
# the workflow never runs at all.
#
# paths-ignore: a kind+cert-manager+Kamaji provisioning run costs ~30-45
# minutes; skip it for PRs that touch nothing the suite exercises
# (docs-only changes). Everything else — Go code, manifests, the harness,
# the workflows themselves — still gates.
pull_request:
paths-ignore:
- '**.md'
- 'docs/**'
push:
tags: [ "v*" ]
workflow_dispatch:

concurrency:
group: e2e-${{ github.ref }}
cancel-in-progress: true

jobs:
kamaji-datastore:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true

- name: e2e (kind + cert-manager + Kamaji)
# Provisions an ephemeral kind cluster, installs cert-manager and
# Kamaji at the versions pinned in hack/e2e.sh, builds the operator
# from this checkout and runs test/e2e — the Kamaji DataStore
# consumer scenario.
#
# On failure, hack/e2e.sh's EXIT trap dumps the cluster state
# (CRs, pods, operator and Kamaji logs) into this step's output
# BEFORE tearing the kind cluster down. Don't add a separate
# dump step here: by the time it would run, the trap has already
# deleted the cluster and every kubectl call would come up empty.
run: make e2e
4 changes: 3 additions & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ name: Build and Publish Docker Image

on:
push:
branches: [ master ]
# The default branch is main; the old `master` filter matched nothing,
# so images were never published.
branches: [ main ]

jobs:
build-and-publish:
Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ vet: ## Run go vet against code.
test: manifests generate fmt vet envtest ## Run tests.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./... -coverprofile cover.out

.PHONY: test-e2e
test-e2e: ## Run the e2e suite against the current kubeconfig context (expects cert-manager, Kamaji and the operator installed; see hack/e2e.sh).
go test -tags e2e -count=1 ./test/e2e/ -v -timeout 30m

.PHONY: e2e
e2e: ## Provision a kind cluster with cert-manager and Kamaji, deploy the operator, run the e2e suite. KEEP_CLUSTER=1 keeps the cluster for debugging.
hack/e2e.sh

##@ Build

.PHONY: build
Expand Down
252 changes: 252 additions & 0 deletions cmd/kubectl-etcd/helpers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
package main

import (
"crypto/ecdsa"
"crypto/elliptic"
"crypto/rand"
"crypto/tls"
"crypto/x509"
"crypto/x509/pkix"
"encoding/pem"
"errors"
"fmt"
"math/big"
"strings"
"testing"
"time"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
)

// ── Blocker #2: a failed/never-ready port-forward must not hang ──────────────

func TestAwaitForward_Ready(t *testing.T) {
ready := make(chan struct{}, 1)
close(ready)
if err := awaitForward(ready, make(chan error, 1), make(chan struct{}, 1), time.Second); err != nil {
t.Fatalf("ready forward should succeed, got %v", err)
}
}

func TestAwaitForward_ErrorBeforeReady(t *testing.T) {
// ForwardPorts returns an error without ever closing readyChan — the old
// code blocked on <-readyChan forever. awaitForward must return the error.
forwardErr := make(chan error, 1)
forwardErr <- fmt.Errorf("dial tcp: connection refused")
err := awaitForward(make(chan struct{}), forwardErr, make(chan struct{}, 1), time.Second)
if err == nil {
t.Fatal("a forward failure must return an error, not hang or succeed")
}
if !strings.Contains(err.Error(), "connection refused") {
t.Errorf("error should wrap the forward failure, got %v", err)
}
}

func TestAwaitForward_Timeout(t *testing.T) {
stop := make(chan struct{}, 1)
// Nothing ever signals ready and no error arrives → must time out, not hang.
err := awaitForward(make(chan struct{}), make(chan error, 1), stop, 10*time.Millisecond)
if err == nil {
t.Fatal("a never-ready forward must time out, not hang")
}
select {
case <-stop:
default:
t.Error("timeout must close stopChan to tear the forwarder down")
}
}

// ── Blocker #3: TLS discovery and credential loading ────────────────────────

// etcdTLSPod returns a Pod whose etcd container points --trusted-ca-file at a
// secret-backed volume mount, mirroring what the operator builds.
func etcdTLSPod() (*corev1.Pod, corev1.Container) {
c := corev1.Container{
Name: "etcd",
Command: []string{"etcd"},
Args: []string{"--trusted-ca-file=/etc/etcd/pki/ca/ca.crt"},
VolumeMounts: []corev1.VolumeMount{{Name: "ca", MountPath: "/etc/etcd/pki/ca"}},
}
pod := &corev1.Pod{
Spec: corev1.PodSpec{
Containers: []corev1.Container{c},
Volumes: []corev1.Volume{{
Name: "ca",
VolumeSource: corev1.VolumeSource{Secret: &corev1.SecretVolumeSource{SecretName: "etcd-ca"}},
}},
},
}
return pod, c
}

func TestFindSecretNameForTLS_Happy(t *testing.T) {
pod, c := etcdTLSPod()
name, err := findSecretNameForTLS(pod, c)
if err != nil {
t.Fatal(err)
}
if name != "etcd-ca" {
t.Errorf("secret name = %q, want %q", name, "etcd-ca")
}
}

func TestFindSecretNameForTLS_NoCAFlag(t *testing.T) {
c := corev1.Container{Name: "etcd", Args: []string{"--listen-client-urls=http://0.0.0.0:2379"}}
pod := &corev1.Pod{Spec: corev1.PodSpec{Containers: []corev1.Container{c}}}
_, err := findSecretNameForTLS(pod, c)
if !errors.Is(err, errNoTrustedCAFile) {
t.Errorf("expected errNoTrustedCAFile sentinel, got %v", err)
}
}

func TestFindSecretNameForTLS_MountNotFound(t *testing.T) {
c := corev1.Container{Name: "etcd", Args: []string{"--trusted-ca-file=/nowhere/ca.crt"}}
pod := &corev1.Pod{Spec: corev1.PodSpec{Containers: []corev1.Container{c}}}
if _, err := findSecretNameForTLS(pod, c); err == nil {
t.Error("expected an error when no volume backs the CA path")
}
}

func TestLoadCredentials_Happy(t *testing.T) {
cs := fake.NewSimpleClientset(&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{Name: "creds", Namespace: "ns1"},
Data: map[string][]byte{
corev1.BasicAuthUsernameKey: []byte("root"),
corev1.BasicAuthPasswordKey: []byte("s3cret"),
},
})
u, p, err := loadCredentials(cs, "ns1", "creds")
if err != nil {
t.Fatal(err)
}
if u != "root" || p != "s3cret" {
t.Errorf("got %q/%q, want root/s3cret", u, p)
}
}

func TestLoadCredentials_DefaultsUsernameToRoot(t *testing.T) {
cs := fake.NewSimpleClientset(&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{Name: "creds", Namespace: "ns1"},
Data: map[string][]byte{corev1.BasicAuthPasswordKey: []byte("p")},
})
u, _, err := loadCredentials(cs, "ns1", "creds")
if err != nil {
t.Fatal(err)
}
if u != "root" {
t.Errorf("username default = %q, want root", u)
}
}

func TestLoadCredentials_MissingPassword(t *testing.T) {
cs := fake.NewSimpleClientset(&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{Name: "creds", Namespace: "ns1"},
Data: map[string][]byte{corev1.BasicAuthUsernameKey: []byte("root")},
})
if _, _, err := loadCredentials(cs, "ns1", "creds"); err == nil {
t.Error("expected an error when the password key is missing/empty")
}
}

func TestLoadCredentials_NamespacedRef(t *testing.T) {
cs := fake.NewSimpleClientset(&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{Name: "creds", Namespace: "other"},
Data: map[string][]byte{corev1.BasicAuthPasswordKey: []byte("p")},
})
// Default namespace is ns1, but the "other/creds" ref must override it.
_, p, err := loadCredentials(cs, "ns1", "other/creds")
if err != nil || p != "p" {
t.Errorf("namespace/name ref not honored: p=%q err=%v", p, err)
}
}

func TestExtractTLSFiles_Happy(t *testing.T) {
cert, key := selfSignedPEM(t)
cs := fake.NewSimpleClientset(&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{Name: "etcd-ca", Namespace: "ns1"},
Data: map[string][]byte{"ca.crt": cert, "tls.crt": cert, "tls.key": key},
})
pool, clientCert, err := extractTLSFiles(cs, "ns1", "etcd-ca")
if err != nil {
t.Fatal(err)
}
if pool == nil || clientCert == nil {
t.Fatal("expected a non-nil CA pool and client certificate")
}
}

func TestExtractTLSFiles_MissingCA(t *testing.T) {
cs := fake.NewSimpleClientset(&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{Name: "etcd-ca", Namespace: "ns1"},
Data: map[string][]byte{"tls.crt": []byte("x"), "tls.key": []byte("y")},
})
if _, _, err := extractTLSFiles(cs, "ns1", "etcd-ca"); err == nil {
t.Error("expected an error when ca.crt is absent from the secret")
}
}

func TestGetTLSConfig_Plaintext(t *testing.T) {
c := corev1.Container{Name: "etcd", Args: []string{"--listen-client-urls=http://0.0.0.0:2379"}}
pod := &corev1.Pod{Spec: corev1.PodSpec{Containers: []corev1.Container{c}}}
cfg, err := getTLSConfig(fake.NewSimpleClientset(), pod, "ns1")
if err != nil {
t.Fatal(err)
}
if cfg != nil {
t.Errorf("a plaintext cluster should yield a nil TLS config, got %+v", cfg)
}
}

func TestGetTLSConfig_TLS(t *testing.T) {
cert, key := selfSignedPEM(t)
pod, _ := etcdTLSPod()
cs := fake.NewSimpleClientset(&corev1.Secret{
ObjectMeta: metav1.ObjectMeta{Name: "etcd-ca", Namespace: "ns1"},
Data: map[string][]byte{"ca.crt": cert, "tls.crt": cert, "tls.key": key},
})
cfg, err := getTLSConfig(cs, pod, "ns1")
if err != nil {
t.Fatal(err)
}
if cfg == nil {
t.Fatal("expected a TLS config for a TLS-enabled cluster")
}
if cfg.MinVersion != tls.VersionTLS12 {
t.Errorf("MinVersion = %d, want TLS 1.2 (%d)", cfg.MinVersion, tls.VersionTLS12)
}
if cfg.RootCAs == nil || len(cfg.Certificates) != 1 {
t.Error("expected a populated RootCAs pool and exactly one client certificate")
}
}

// selfSignedPEM returns a fresh self-signed certificate and its EC private key,
// PEM-encoded — enough to satisfy x509 parsing and tls.X509KeyPair in tests.
func selfSignedPEM(t *testing.T) (certPEM, keyPEM []byte) {
t.Helper()
priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
tmpl := &x509.Certificate{
SerialNumber: big.NewInt(1),
Subject: pkix.Name{CommonName: "test-etcd"},
NotBefore: time.Now().Add(-time.Hour),
NotAfter: time.Now().Add(time.Hour),
KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign,
BasicConstraintsValid: true,
IsCA: true,
}
der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &priv.PublicKey, priv)
if err != nil {
t.Fatalf("create certificate: %v", err)
}
certPEM = pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
keyDER, err := x509.MarshalECPrivateKey(priv)
if err != nil {
t.Fatalf("marshal key: %v", err)
}
keyPEM = pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER})
return certPEM, keyPEM
}
5 changes: 4 additions & 1 deletion config/default/manager_auth_proxy_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ spec:
capabilities:
drop:
- "ALL"
image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1
# gcr.io/kubebuilder/kube-rbac-proxy is gone (the gcr.io/kubebuilder
# registry shut down in early 2025); quay.io/brancz is the upstream
# home of kube-rbac-proxy that kubebuilder pointed users to.
image: quay.io/brancz/kube-rbac-proxy:v0.18.2
args:
- "--secure-listen-address=0.0.0.0:8443"
- "--upstream=http://127.0.0.1:8080/"
Expand Down
13 changes: 12 additions & 1 deletion controllers/etcdcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,19 @@ func (r *EtcdClusterReconciler) bootstrap(
// Pod label on the first reconcile of the seed Pod, so the PDB
// selects it from creation rather than only after MemberList runs
// a few reconciles later.
//
// Written as a merge patch, not an Update: the member controller
// reconciles the seed the moment it is created and its own
// Status().Update races with this write — an optimistic-locked
// Update here loses that race with a conflict, and a cache-backed
// re-Get cannot recover (the informer may not even have the
// object yet). This stamp happens only in the create branch —
// losing it would leave the seed permanently IsVoter=false, and
// both learner memberID discovery and promotion filter their dial
// endpoints to voters, wedging every later scale-up.
stampBase := seed.DeepCopy()
seed.Status.IsVoter = true
if err := r.Status().Update(ctx, seed); err != nil {
if err := r.Status().Patch(ctx, seed, client.MergeFrom(stampBase)); err != nil {
return ctrl.Result{}, err
}
} else if !metav1.IsControlledBy(seed, cluster) {
Expand Down
Loading
Loading