From 1f6a84483a591b87201faf5c48a29ad8a098afaa Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Thu, 30 Apr 2026 16:33:12 -0700 Subject: [PATCH 1/9] feat: iroh DNS discovery controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds IrohDNSReconciler, gated by connector.iroh.dnsEnabled, that watches Connector resources across project clusters and maintains a 1-1 DNSRecordSet in a separately-configured downstream cluster carrying the iroh-format TXT record at "..". For each Connector the reconciler: - resolves spec.connectorClassName and only acts when the class controllerName is networking.datumapis.com/datum-connect (legacy) or networking.datumapis.com/iroh-quic-tunnel (current); - adds a networking.datumapis.com/iroh-dns-cleanup finalizer so cross- cluster DNSRecordSet cleanup runs even though OwnerReferences can't cross clusters; - reads status.connectionDetails.publicKey, hex-decodes the EndpointId and z-base-32 encodes it for the DNS lookup name; - builds a DNSRecordSet with one TXT entry per RFC 1464 attribute (relay=, addr=...). IPv6 addresses are bracketed via net.JoinHostPort. Empty relay or empty address list → that entry is omitted; only when both are missing do we skip publishing entirely. - server-side-applies the DNSRecordSet via the downstream client, field-managed under "network-services-operator/iroh-dns"; - deletes the DNSRecordSet when the Connector is deleted, when the class no longer routes to iroh, or when the agent's status drops the connection details (so stale TXT doesn't linger). Wires into cmd/main.go behind the dnsEnabled gate, building the downstream client from IrohConnectorConfig.DownstreamRestConfig (empty path falls back to in-cluster, matching DiscoveryConfig). The dns.networking.miloapis.com/v1alpha1 scheme is already registered on the operator's scheme so it's reused for the downstream client. Tests cover the status-gating decision table, the full record-set contents (z32 vs hex boundary, IPv6 bracketing, labels, TTL, multiple addresses), the relay-only/addr-only branches, and the invalid hex error path. --- cmd/main.go | 20 ++ internal/controller/iroh_dns_controller.go | 294 ++++++++++++++++++ .../controller/iroh_dns_controller_test.go | 243 +++++++++++++++ 3 files changed, 557 insertions(+) create mode 100644 internal/controller/iroh_dns_controller.go create mode 100644 internal/controller/iroh_dns_controller_test.go diff --git a/cmd/main.go b/cmd/main.go index 28fc80d..a32b30f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -421,6 +421,26 @@ func main() { os.Exit(1) } + if serverConfig.Connector.Iroh.DNSEnabled { + irohRestCfg, err := serverConfig.Connector.Iroh.DownstreamRestConfig() + if err != nil { + setupLog.Error(err, "unable to load iroh dns downstream kubeconfig") + os.Exit(1) + } + irohDownstream, err := client.New(irohRestCfg, client.Options{Scheme: scheme}) + if err != nil { + setupLog.Error(err, "unable to build iroh dns downstream client") + os.Exit(1) + } + if err := (&controller.IrohDNSReconciler{ + Config: serverConfig, + Downstream: irohDownstream, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "IrohDNS") + os.Exit(1) + } + } + if serverConfig.Gateway.ShouldDeleteErroredChallenges() { if err := (&controller.ChallengeReconciler{ Config: serverConfig, diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go new file mode 100644 index 0000000..6787a0b --- /dev/null +++ b/internal/controller/iroh_dns_controller.go @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package controller + +import ( + "context" + "fmt" + "net" + "strconv" + "strings" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/cluster" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder" + mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager" + mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile" + + dnsv1alpha1 "go.miloapis.com/dns-operator/api/v1alpha1" + + networkingv1alpha1 "go.datum.net/network-services-operator/api/v1alpha1" + "go.datum.net/network-services-operator/internal/config" + "go.datum.net/network-services-operator/internal/iroh" +) + +const ( + irohDNSFinalizer = "networking.datumapis.com/iroh-dns-cleanup" + irohDNSFieldManager = "network-services-operator/iroh-dns" +) + +// allowedIrohControllerNames is the set of ConnectorClass.spec.controllerName +// values for which we publish iroh DNS records. Both names refer to the same +// controller; "datum-connect" is the legacy name kept alive while older +// desktop builds churn out. +var allowedIrohControllerNames = map[string]struct{}{ + "networking.datumapis.com/datum-connect": {}, + "networking.datumapis.com/iroh-quic-tunnel": {}, +} + +// IrohDNSReconciler watches Connectors backed by an iroh-routed +// ConnectorClass and maintains a 1-1 DNSRecordSet in the configured +// downstream cluster carrying the iroh-format TXT record at +// "..". +type IrohDNSReconciler struct { + mgr mcmanager.Manager + Config config.NetworkServicesOperator + Downstream client.Client +} + +// +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectors,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectors/finalizers,verbs=update +// +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectorclasses,verbs=get;list;watch + +func (r *IrohDNSReconciler) Reconcile(ctx context.Context, req mcreconcile.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx, "cluster", req.ClusterName) + ctx = log.IntoContext(ctx, logger) + + cl, err := r.mgr.GetCluster(ctx, req.ClusterName) + if err != nil { + return ctrl.Result{}, err + } + + var connector networkingv1alpha1.Connector + if err := cl.GetClient().Get(ctx, req.NamespacedName, &connector); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + dnsName := r.dnsRecordSetName(&connector) + + if !connector.DeletionTimestamp.IsZero() { + if !controllerutil.ContainsFinalizer(&connector, irohDNSFinalizer) { + return ctrl.Result{}, nil + } + if err := r.deleteRecordSet(ctx, dnsName); err != nil { + return ctrl.Result{}, err + } + controllerutil.RemoveFinalizer(&connector, irohDNSFinalizer) + if err := cl.GetClient().Update(ctx, &connector); err != nil { + return ctrl.Result{}, fmt.Errorf("remove finalizer: %w", err) + } + return ctrl.Result{}, nil + } + + matches, err := r.classRoutesToIroh(ctx, cl, &connector) + if err != nil { + return ctrl.Result{}, err + } + if !matches { + // Class doesn't route here. If we previously owned a record (finalizer + // present), tear it down and release the Connector. + if controllerutil.ContainsFinalizer(&connector, irohDNSFinalizer) { + if err := r.deleteRecordSet(ctx, dnsName); err != nil { + return ctrl.Result{}, err + } + controllerutil.RemoveFinalizer(&connector, irohDNSFinalizer) + if err := cl.GetClient().Update(ctx, &connector); err != nil { + return ctrl.Result{}, fmt.Errorf("release finalizer: %w", err) + } + } + return ctrl.Result{}, nil + } + + if !controllerutil.ContainsFinalizer(&connector, irohDNSFinalizer) { + controllerutil.AddFinalizer(&connector, irohDNSFinalizer) + if err := cl.GetClient().Update(ctx, &connector); err != nil { + return ctrl.Result{}, fmt.Errorf("add finalizer: %w", err) + } + // Requeue via the inevitable update event; nothing more to do this pass. + return ctrl.Result{}, nil + } + + desired, ok, err := r.buildDesiredRecordSet(&connector) + if err != nil { + return ctrl.Result{}, err + } + if !ok { + // Status not yet populated by the agent. Tear down any prior record so + // stale entries don't linger; reconcile fires again on status update. + if err := r.deleteRecordSet(ctx, dnsName); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + if err := r.applyRecordSet(ctx, desired); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func (r *IrohDNSReconciler) classRoutesToIroh(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector) (bool, error) { + if connector.Spec.ConnectorClassName == "" { + return false, nil + } + var class networkingv1alpha1.ConnectorClass + if err := cl.GetClient().Get(ctx, client.ObjectKey{Name: connector.Spec.ConnectorClassName}, &class); err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, fmt.Errorf("get connectorclass: %w", err) + } + _, ok := allowedIrohControllerNames[class.Spec.ControllerName] + return ok, nil +} + +func (r *IrohDNSReconciler) dnsRecordSetName(connector *networkingv1alpha1.Connector) client.ObjectKey { + return client.ObjectKey{ + Namespace: r.Config.Connector.Iroh.DNSZoneRef.Namespace, + Name: "iroh-" + string(connector.UID), + } +} + +// buildDesiredRecordSet builds the DNSRecordSet we want present in the +// downstream cluster. The second return value is false when the Connector +// status doesn't yet carry enough data to publish a useful record (no +// endpoint id at all, or neither relay nor any addresses). +func (r *IrohDNSReconciler) buildDesiredRecordSet(connector *networkingv1alpha1.Connector) (*dnsv1alpha1.DNSRecordSet, bool, error) { + if connector.Status.ConnectionDetails == nil || connector.Status.ConnectionDetails.PublicKey == nil { + return nil, false, nil + } + pk := connector.Status.ConnectionDetails.PublicKey + if pk.Id == "" { + return nil, false, nil + } + if pk.HomeRelay == "" && len(pk.Addresses) == 0 { + return nil, false, nil + } + + z32, err := iroh.EndpointHexToZ32(pk.Id) + if err != nil { + return nil, false, fmt.Errorf("encode endpoint id: %w", err) + } + + cfg := r.Config.Connector.Iroh + recordName := cfg.RecordPrefix + "." + z32 + ttl := int64(cfg.TTLSeconds) + key := r.dnsRecordSetName(connector) + + var entries []dnsv1alpha1.RecordEntry + if pk.HomeRelay != "" { + entries = append(entries, dnsv1alpha1.RecordEntry{ + Name: recordName, + TTL: &ttl, + TXT: &dnsv1alpha1.TXTRecordSpec{Content: "relay=" + pk.HomeRelay}, + }) + } + if len(pk.Addresses) > 0 { + entries = append(entries, dnsv1alpha1.RecordEntry{ + Name: recordName, + TTL: &ttl, + TXT: &dnsv1alpha1.TXTRecordSpec{Content: "addr=" + joinIrohAddresses(pk.Addresses)}, + }) + } + + drs := &dnsv1alpha1.DNSRecordSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: dnsv1alpha1.GroupVersion.String(), + Kind: "DNSRecordSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + Labels: map[string]string{ + "app.kubernetes.io/managed-by": "network-services-operator", + "networking.datumapis.com/connector-uid": string(connector.UID), + "networking.datumapis.com/connector-namespace": connector.Namespace, + "networking.datumapis.com/connector-name": connector.Name, + }, + }, + Spec: dnsv1alpha1.DNSRecordSetSpec{ + DNSZoneRef: corev1.LocalObjectReference{Name: cfg.DNSZoneRef.Name}, + RecordType: dnsv1alpha1.RRTypeTXT, + Records: entries, + }, + } + return drs, true, nil +} + +// joinIrohAddresses formats a list of (address, port) tuples for the +// `addr=` TXT value iroh expects: socket addresses separated by single +// spaces, IPv6 addresses in bracketed form (RFC 3986). +func joinIrohAddresses(addrs []networkingv1alpha1.PublicKeyConnectorAddress) string { + parts := make([]string, 0, len(addrs)) + for _, a := range addrs { + parts = append(parts, net.JoinHostPort(a.Address, strconv.Itoa(int(a.Port)))) + } + return strings.Join(parts, " ") +} + +func (r *IrohDNSReconciler) applyRecordSet(ctx context.Context, desired *dnsv1alpha1.DNSRecordSet) error { + if err := r.Downstream.Patch(ctx, desired, client.Apply, client.FieldOwner(irohDNSFieldManager), client.ForceOwnership); err != nil { + return fmt.Errorf("apply DNSRecordSet: %w", err) + } + return nil +} + +func (r *IrohDNSReconciler) deleteRecordSet(ctx context.Context, key client.ObjectKey) error { + drs := &dnsv1alpha1.DNSRecordSet{ObjectMeta: metav1.ObjectMeta{Name: key.Name, Namespace: key.Namespace}} + if err := r.Downstream.Delete(ctx, drs); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("delete DNSRecordSet: %w", err) + } + return nil +} + +// SetupWithManager wires the reconciler into the multicluster manager. Watches +// the upstream Connector and ConnectorClass; the downstream DNSRecordSet +// client is held directly on the reconciler since it lives in a different +// cluster than the manager. +func (r *IrohDNSReconciler) SetupWithManager(mgr mcmanager.Manager) error { + r.mgr = mgr + + return mcbuilder.ControllerManagedBy(mgr). + For(&networkingv1alpha1.Connector{}). + Watches( + &networkingv1alpha1.ConnectorClass{}, + func(clusterName string, cl cluster.Cluster) handler.TypedEventHandler[client.Object, mcreconcile.Request] { + return handler.TypedEnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []mcreconcile.Request { + logger := log.FromContext(ctx) + class, ok := obj.(*networkingv1alpha1.ConnectorClass) + if !ok { + return nil + } + var connectors networkingv1alpha1.ConnectorList + if err := cl.GetClient().List(ctx, &connectors); err != nil { + logger.Error(err, "list Connectors for ConnectorClass watch", "connectorClass", class.Name) + return nil + } + var requests []mcreconcile.Request + for i := range connectors.Items { + c := &connectors.Items[i] + if c.Spec.ConnectorClassName != class.Name { + continue + } + requests = append(requests, mcreconcile.Request{ + ClusterName: clusterName, + Request: ctrl.Request{NamespacedName: client.ObjectKeyFromObject(c)}, + }) + } + return requests + }) + }, + ). + Named("iroh-dns"). + Complete(r) +} diff --git a/internal/controller/iroh_dns_controller_test.go b/internal/controller/iroh_dns_controller_test.go new file mode 100644 index 0000000..e62554b --- /dev/null +++ b/internal/controller/iroh_dns_controller_test.go @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package controller + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + dnsv1alpha1 "go.miloapis.com/dns-operator/api/v1alpha1" + + networkingv1alpha1 "go.datum.net/network-services-operator/api/v1alpha1" + "go.datum.net/network-services-operator/internal/config" +) + +// Real iroh public key from iroh-base/src/key.rs SecretKey.public, chosen +// because it has a known z32 form we can pin against. +const ( + testEndpointHex = "f120d52e42bfcee750508baf28900acac85ad3f397ab4bb653b32be505c32d39" + testEndpointZ32 = "6ropkm1nz98qqwnotqz1tryk3mrfiw9u16iwzp1usci6kbqdfwho" +) + +func newReconciler() *IrohDNSReconciler { + return &IrohDNSReconciler{ + Config: config.NetworkServicesOperator{ + Connector: config.ConnectorConfig{ + Iroh: config.IrohConnectorConfig{ + DNSEnabled: true, + RecordPrefix: "_iroh", + BaseDomain: "datumconnect.net", + TTLSeconds: 30, + DNSZoneRef: config.IrohDNSZoneRef{ + Namespace: "datum-dns", + Name: "datumconnect-net", + }, + }, + }, + }, + } +} + +func newConnector(pk *networkingv1alpha1.ConnectorConnectionDetailsPublicKey) *networkingv1alpha1.Connector { + c := &networkingv1alpha1.Connector{ + ObjectMeta: metav1.ObjectMeta{ + Name: "edge-1", + Namespace: "default", + UID: types.UID("00000000-0000-0000-0000-000000000abc"), + }, + Spec: networkingv1alpha1.ConnectorSpec{ + ConnectorClassName: "datum-connect", + }, + } + if pk != nil { + c.Status.ConnectionDetails = &networkingv1alpha1.ConnectorConnectionDetails{ + Type: networkingv1alpha1.PublicKeyConnectorConnectionType, + PublicKey: pk, + } + } + return c +} + +func TestBuildDesiredRecordSet_StatusGating(t *testing.T) { + tests := []struct { + name string + pk *networkingv1alpha1.ConnectorConnectionDetailsPublicKey + want bool + }{ + {name: "no connection details", pk: nil, want: false}, + {name: "no public key data — empty struct", pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{}, want: false}, + { + name: "id without relay or addresses", + pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{Id: testEndpointHex}, + want: false, + }, + { + name: "id with relay only — publishes", + pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: "https://relay.example.com", + }, + want: true, + }, + { + name: "id with addresses only — publishes", + pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: "192.0.2.1", Port: 8080}}, + }, + want: true, + }, + { + name: "id with both — publishes", + pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: "https://relay.example.com", + Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: "192.0.2.1", Port: 8080}}, + }, + want: true, + }, + } + + r := newReconciler() + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, ok, err := r.buildDesiredRecordSet(newConnector(tt.pk)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ok != tt.want { + t.Fatalf("ok = %v, want %v", ok, tt.want) + } + }) + } +} + +func TestBuildDesiredRecordSet_RecordContents(t *testing.T) { + r := newReconciler() + conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: "https://relay.example.com", + Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: "192.0.2.1", Port: 8080}, + {Address: "2001:db8::1", Port: 9090}, + }, + }) + + drs, ok, err := r.buildDesiredRecordSet(conn) + if err != nil || !ok { + t.Fatalf("buildDesiredRecordSet failed: ok=%v err=%v", ok, err) + } + + wantName := "iroh-" + string(conn.UID) + if drs.Name != wantName { + t.Errorf("Name = %q, want %q", drs.Name, wantName) + } + if drs.Namespace != "datum-dns" { + t.Errorf("Namespace = %q, want %q", drs.Namespace, "datum-dns") + } + if drs.Spec.RecordType != dnsv1alpha1.RRTypeTXT { + t.Errorf("RecordType = %q, want %q", drs.Spec.RecordType, dnsv1alpha1.RRTypeTXT) + } + if drs.Spec.DNSZoneRef.Name != "datumconnect-net" { + t.Errorf("DNSZoneRef.Name = %q, want %q", drs.Spec.DNSZoneRef.Name, "datumconnect-net") + } + + wantRecordName := "_iroh." + testEndpointZ32 + if len(drs.Spec.Records) != 2 { + t.Fatalf("Records count = %d, want 2 (relay + addr)", len(drs.Spec.Records)) + } + gotContents := []string{drs.Spec.Records[0].TXT.Content, drs.Spec.Records[1].TXT.Content} + wantContents := []string{ + "relay=https://relay.example.com", + "addr=192.0.2.1:8080 [2001:db8::1]:9090", + } + for i := range gotContents { + if gotContents[i] != wantContents[i] { + t.Errorf("Records[%d].TXT.Content = %q, want %q", i, gotContents[i], wantContents[i]) + } + if drs.Spec.Records[i].Name != wantRecordName { + t.Errorf("Records[%d].Name = %q, want %q", i, drs.Spec.Records[i].Name, wantRecordName) + } + if drs.Spec.Records[i].TTL == nil || *drs.Spec.Records[i].TTL != 30 { + t.Errorf("Records[%d].TTL = %v, want 30", i, drs.Spec.Records[i].TTL) + } + } + + for k, v := range map[string]string{ + "app.kubernetes.io/managed-by": "network-services-operator", + "networking.datumapis.com/connector-uid": string(conn.UID), + "networking.datumapis.com/connector-namespace": conn.Namespace, + "networking.datumapis.com/connector-name": conn.Name, + } { + if drs.Labels[k] != v { + t.Errorf("label %q = %q, want %q", k, drs.Labels[k], v) + } + } +} + +func TestBuildDesiredRecordSet_RelayOnlyOmitsAddrEntry(t *testing.T) { + r := newReconciler() + conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: "https://relay.example.com", + }) + + drs, _, err := r.buildDesiredRecordSet(conn) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(drs.Spec.Records) != 1 { + t.Fatalf("Records count = %d, want 1 (relay only)", len(drs.Spec.Records)) + } + if drs.Spec.Records[0].TXT.Content != "relay=https://relay.example.com" { + t.Errorf("Content = %q", drs.Spec.Records[0].TXT.Content) + } +} + +func TestBuildDesiredRecordSet_InvalidEndpointId(t *testing.T) { + r := newReconciler() + conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: "not-hex", + HomeRelay: "https://relay.example.com", + }) + if _, _, err := r.buildDesiredRecordSet(conn); err == nil { + t.Fatal("expected error for non-hex endpoint id, got nil") + } +} + +func TestJoinIrohAddresses(t *testing.T) { + tests := []struct { + name string + addrs []networkingv1alpha1.PublicKeyConnectorAddress + want string + }{ + {name: "empty", addrs: nil, want: ""}, + { + name: "single ipv4", + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: "192.0.2.1", Port: 8080}}, + want: "192.0.2.1:8080", + }, + { + name: "single ipv6 — bracketed", + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: "2001:db8::1", Port: 9090}}, + want: "[2001:db8::1]:9090", + }, + { + name: "mixed ipv4 + ipv6", + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: "192.0.2.1", Port: 8080}, + {Address: "2001:db8::1", Port: 9090}, + }, + want: "192.0.2.1:8080 [2001:db8::1]:9090", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := joinIrohAddresses(tt.addrs); got != tt.want { + t.Errorf("joinIrohAddresses = %q, want %q", got, tt.want) + } + }) + } +} From a93bf06e7938ad36e2d42aba84a5247de4e14fdd Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Thu, 30 Apr 2026 20:03:06 -0700 Subject: [PATCH 2/9] fix: sort iroh addresses for stable TXT output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit joinIrohAddresses now sorts by (address, port) before joining. pk.Addresses originates from iroh-base's iter-over-set, which is not order-stable, so the same set of endpoints can show up in different orders across heartbeats. Without sorting we'd produce different "addr=" TXT content on each reconcile, every server-side apply would trip a write, dns-operator would re-PATCH PowerDNS — pure churn for no behavior change. The sort runs on a clone so the caller's slice is preserved (it's shared with watchers and other reconciler passes). Tests cover the normalization (reversed input → canonical output), same-address-different-port ordering, and non-mutation of the input. --- internal/controller/iroh_dns_controller.go | 19 ++++++++-- .../controller/iroh_dns_controller_test.go | 36 +++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go index 6787a0b..43c5518 100644 --- a/internal/controller/iroh_dns_controller.go +++ b/internal/controller/iroh_dns_controller.go @@ -3,9 +3,11 @@ package controller import ( + "cmp" "context" "fmt" "net" + "slices" "strconv" "strings" @@ -227,10 +229,21 @@ func (r *IrohDNSReconciler) buildDesiredRecordSet(connector *networkingv1alpha1. // joinIrohAddresses formats a list of (address, port) tuples for the // `addr=` TXT value iroh expects: socket addresses separated by single -// spaces, IPv6 addresses in bracketed form (RFC 3986). +// spaces, IPv6 addresses in bracketed form (RFC 3986). The input is +// sorted by (address, port) before joining so that an agent reporting +// the same set of endpoints in different orders across heartbeats — +// iroh-base's iter-over-set is not stable — produces the same TXT +// content and avoids spurious server-side-apply writes. func joinIrohAddresses(addrs []networkingv1alpha1.PublicKeyConnectorAddress) string { - parts := make([]string, 0, len(addrs)) - for _, a := range addrs { + sorted := slices.Clone(addrs) + slices.SortFunc(sorted, func(a, b networkingv1alpha1.PublicKeyConnectorAddress) int { + return cmp.Or( + cmp.Compare(a.Address, b.Address), + cmp.Compare(a.Port, b.Port), + ) + }) + parts := make([]string, 0, len(sorted)) + for _, a := range sorted { parts = append(parts, net.JoinHostPort(a.Address, strconv.Itoa(int(a.Port)))) } return strings.Join(parts, " ") diff --git a/internal/controller/iroh_dns_controller_test.go b/internal/controller/iroh_dns_controller_test.go index e62554b..26aa97e 100644 --- a/internal/controller/iroh_dns_controller_test.go +++ b/internal/controller/iroh_dns_controller_test.go @@ -232,6 +232,22 @@ func TestJoinIrohAddresses(t *testing.T) { }, want: "192.0.2.1:8080 [2001:db8::1]:9090", }, + { + name: "input order is normalized — agent may report in any order", + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: "2001:db8::1", Port: 9090}, + {Address: "192.0.2.1", Port: 8080}, + }, + want: "192.0.2.1:8080 [2001:db8::1]:9090", + }, + { + name: "same address different ports — sorted by port", + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: "192.0.2.1", Port: 9090}, + {Address: "192.0.2.1", Port: 8080}, + }, + want: "192.0.2.1:8080 192.0.2.1:9090", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -241,3 +257,23 @@ func TestJoinIrohAddresses(t *testing.T) { }) } } + +// TestJoinIrohAddresses_DoesNotMutateInput ensures we don't reorder the +// caller's slice — Connector.Status fields are shared with watchers and +// other reconciler passes. +func TestJoinIrohAddresses_DoesNotMutateInput(t *testing.T) { + original := []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: "2001:db8::1", Port: 9090}, + {Address: "192.0.2.1", Port: 8080}, + } + want := []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: "2001:db8::1", Port: 9090}, + {Address: "192.0.2.1", Port: 8080}, + } + _ = joinIrohAddresses(original) + for i := range want { + if original[i] != want[i] { + t.Fatalf("input was mutated at index %d: got %+v, want %+v", i, original[i], want[i]) + } + } +} From 36a6564ffb4c9a4d09fc9c2732db923583f8ccc2 Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Fri, 1 May 2026 10:15:25 -0700 Subject: [PATCH 3/9] refactor: drop unused baseDomain, add recordSuffix for nested origins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous baseDomain field was documented as "the suffix appended to the prefix and z32 EndpointId to form the full lookup name" but the controller never actually consumed it — record names were built solely from recordPrefix + z32 and dns-operator picked the origin from the DNSZone. baseDomain was a redundant config knob that, if disagreeing with the zone, would silently misconfigure. Replace it with recordSuffix: optional labels appended after the z32 EndpointId, before the zone origin. The DNSZone's spec.domainName remains the only source of authoritative origin truth (and the zone's metadata.name need not match its actual domain — they often won't). For "_iroh..connectors.example.com" with a DNSZone whose spec.domainName is example.com, set recordSuffix=connectors. For records directly under the zone root, leave recordSuffix empty. Validation drops the baseDomain check; only dnsZoneRef.{name,namespace} remain required when dnsEnabled is true. Tests updated; new test covers the empty-suffix branch so we don't regress to the old ".." naming. --- internal/config/config.go | 20 +++++++++-------- internal/config/config_test.go | 13 +++++------ internal/controller/iroh_dns_controller.go | 7 ++++++ .../controller/iroh_dns_controller_test.go | 22 +++++++++++++++++-- 4 files changed, 43 insertions(+), 19 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 3876afc..200cae2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -177,18 +177,23 @@ type IrohConnectorConfig struct { DownstreamKubeconfigPath string `json:"downstreamKubeconfigPath,omitempty"` // DNSZoneRef references the DNSZone (in the downstream cluster) that - // owns the names this controller manages. + // owns the names this controller manages. The actual DNS origin used + // for the FQDN is the zone's spec.domainName, not its metadata.name — + // the two need not agree. DNSZoneRef IrohDNSZoneRef `json:"dnsZoneRef,omitempty"` - // RecordPrefix is the leading DNS label of the discovery name. - // iroh uses "_iroh" by convention. + // RecordPrefix is the leading DNS label of the discovery name. iroh + // requires "_iroh" by convention. // // +default="_iroh" RecordPrefix string `json:"recordPrefix,omitempty"` - // BaseDomain is the suffix appended to the prefix and z32 EndpointId - // to form the full lookup name "..". - BaseDomain string `json:"baseDomain,omitempty"` + // RecordSuffix is appended after the z32 EndpointId, before the zone + // origin. Use it to nest discovery records under additional labels + // (e.g. set "connectors" with a zone for "example.com" to publish at + // "_iroh..connectors.example.com"). Empty means the records sit + // directly under the zone root. + RecordSuffix string `json:"recordSuffix,omitempty"` // TTLSeconds is the TTL written on each TXT record. // @@ -1108,9 +1113,6 @@ func (c *IrohConnectorConfig) validate() error { return nil } var errs []error - if c.BaseDomain == "" { - errs = append(errs, errors.New("baseDomain is required when dnsEnabled is true")) - } if c.DNSZoneRef.Name == "" { errs = append(errs, errors.New("dnsZoneRef.name is required when dnsEnabled is true")) } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index c0ad2b4..b8807ce 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -17,7 +17,6 @@ func TestNetworkServicesOperator_Validate_IrohDisabled(t *testing.T) { func TestNetworkServicesOperator_Validate_IrohEnabled(t *testing.T) { full := IrohConnectorConfig{ DNSEnabled: true, - BaseDomain: "datumconnect.net", DNSZoneRef: IrohDNSZoneRef{Namespace: "datum-dns", Name: "datumconnect-net"}, } @@ -27,11 +26,6 @@ func TestNetworkServicesOperator_Validate_IrohEnabled(t *testing.T) { wantSub string }{ {name: "all required fields set"}, - { - name: "missing baseDomain", - mutate: func(c *IrohConnectorConfig) { c.BaseDomain = "" }, - wantSub: "baseDomain is required", - }, { name: "missing dnsZoneRef.name", mutate: func(c *IrohConnectorConfig) { c.DNSZoneRef.Name = "" }, @@ -48,6 +42,10 @@ func TestNetworkServicesOperator_Validate_IrohEnabled(t *testing.T) { c.DownstreamKubeconfigPath = "" }, }, + { + name: "recordSuffix is optional (records sit under zone root)", + mutate: func(c *IrohConnectorConfig) { c.RecordSuffix = "" }, + }, } for _, tt := range tests { @@ -82,10 +80,9 @@ func TestNetworkServicesOperator_Validate_IrohEnabledAggregatesErrors(t *testing if err == nil { t.Fatal("expected error, got nil") } - // errors.Join joins distinct messages with newlines; all five required + // errors.Join joins distinct messages with newlines; both required // fields should be surfaced. for _, want := range []string{ - "baseDomain is required", "dnsZoneRef.name is required", "dnsZoneRef.namespace is required", } { diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go index 43c5518..65ce264 100644 --- a/internal/controller/iroh_dns_controller.go +++ b/internal/controller/iroh_dns_controller.go @@ -183,7 +183,14 @@ func (r *IrohDNSReconciler) buildDesiredRecordSet(connector *networkingv1alpha1. } cfg := r.Config.Connector.Iroh + // RecordEntry.Name is relative to the DNSZone — dns-operator + // qualifies it with the zone's spec.domainName at apply time. So + // we never include the zone origin here; we only express the labels + // that should sit between iroh's "_iroh" prefix and the zone root. recordName := cfg.RecordPrefix + "." + z32 + if cfg.RecordSuffix != "" { + recordName = recordName + "." + cfg.RecordSuffix + } ttl := int64(cfg.TTLSeconds) key := r.dnsRecordSetName(connector) diff --git a/internal/controller/iroh_dns_controller_test.go b/internal/controller/iroh_dns_controller_test.go index 26aa97e..07b2e75 100644 --- a/internal/controller/iroh_dns_controller_test.go +++ b/internal/controller/iroh_dns_controller_test.go @@ -28,7 +28,7 @@ func newReconciler() *IrohDNSReconciler { Iroh: config.IrohConnectorConfig{ DNSEnabled: true, RecordPrefix: "_iroh", - BaseDomain: "datumconnect.net", + RecordSuffix: "connectors", TTLSeconds: 30, DNSZoneRef: config.IrohDNSZoneRef{ Namespace: "datum-dns", @@ -144,7 +144,7 @@ func TestBuildDesiredRecordSet_RecordContents(t *testing.T) { t.Errorf("DNSZoneRef.Name = %q, want %q", drs.Spec.DNSZoneRef.Name, "datumconnect-net") } - wantRecordName := "_iroh." + testEndpointZ32 + wantRecordName := "_iroh." + testEndpointZ32 + ".connectors" if len(drs.Spec.Records) != 2 { t.Fatalf("Records count = %d, want 2 (relay + addr)", len(drs.Spec.Records)) } @@ -196,6 +196,24 @@ func TestBuildDesiredRecordSet_RelayOnlyOmitsAddrEntry(t *testing.T) { } } +func TestBuildDesiredRecordSet_EmptySuffixPutsRecordsUnderZoneRoot(t *testing.T) { + r := newReconciler() + r.Config.Connector.Iroh.RecordSuffix = "" + conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: "https://relay.example.com", + }) + + drs, _, err := r.buildDesiredRecordSet(conn) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + want := "_iroh." + testEndpointZ32 + if drs.Spec.Records[0].Name != want { + t.Errorf("record name = %q, want %q (no trailing suffix)", drs.Spec.Records[0].Name, want) + } +} + func TestBuildDesiredRecordSet_InvalidEndpointId(t *testing.T) { r := newReconciler() conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ From a0578ff0b3a54ed5b7b6d105e494cdf479bc2994 Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Fri, 1 May 2026 11:38:32 -0700 Subject: [PATCH 4/9] refactor: claim-based ownership keyed by iroh endpoint id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous design (1 DNSRecordSet per Connector UID) created duplicate downstream RecordSets when multiple Connectors shared an iroh keypair — observed in staging where four Connectors with the same z32 produced four DNSRecordSets fighting over the same DNS name, three stuck at PROGRAMMED=False. Multiple Connectors per keypair is legitimate (same agent registered against two projects, multi-device reuse) so the controller has to handle it. This change: - DNSRecordSet name is now iroh-, deterministic on the endpoint id rather than the Connector UID. Multiple Connectors that share a keypair collapse to one record at the iroh-DNS name they all need. - Reconcile loop is claim-based: Get downstream record → not found : Create with our claim (label iroh-dns-claimed-by-uid=) found, ours : SSA refresh content found, foreign : defer; surface IrohDNSPublished=False with Reason=DeferredToOwner naming the foreign Connector via cluster/namespace/name labels. AlreadyExists on Create is treated as "sibling raced"; the next reconcile lands in the foreign-claim branch. - On Connector deletion (or class-no-longer-routes-to-iroh, or status loses connection details), we delete the DNSRecordSet only if we hold the claim. Foreign-owned records are untouched. - IrohDNSPublished status condition added on Connector with reasons Owner / DeferredToOwner / Pending so users can see at the resource level which Connector is publishing. - Downstream DNSRecordSet is now exposed via cluster.Cluster (was client.Client), wired through the manager's errgroup like the existing downstreamCluster. A WatchesRawSource via mcsource.Kind enqueues the owner Connector identified in the labels when the DNSRecordSet changes — primarily catching drift / external delete for the owner. Sibling handover (when an owner's record disappears, another Connector should take over) rides the existing Connector watch — multicluster-runtime's manager exposes GetCluster(name) but not enumeration, so a downstream event can't fan out to siblings across project clusters. Connector status updates (lease renewals) fire frequently enough that handover converges within seconds. Documented inline. Tests: status gating, full record contents (asserts new label set: iroh-dns-claimed-by-uid plus connector cluster/namespace/name), empty/non-empty recordSuffix, partial record (relay-only), invalid hex id, and a new test verifying two distinct Connectors with the same key compute the same DNSRecordSet name (the load-bearing claim property). Address sorting / IPv6 bracketing / non-mutation tests preserved. Existing iroh- records from the v1 design are left orphaned and will be cleaned up manually. --- cmd/main.go | 13 +- internal/controller/iroh_dns_controller.go | 271 +++++++++++++----- .../controller/iroh_dns_controller_test.go | 71 ++++- 3 files changed, 269 insertions(+), 86 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index a32b30f..26747c8 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -421,15 +421,18 @@ func main() { os.Exit(1) } + var irohDownstream cluster.Cluster if serverConfig.Connector.Iroh.DNSEnabled { irohRestCfg, err := serverConfig.Connector.Iroh.DownstreamRestConfig() if err != nil { setupLog.Error(err, "unable to load iroh dns downstream kubeconfig") os.Exit(1) } - irohDownstream, err := client.New(irohRestCfg, client.Options{Scheme: scheme}) + irohDownstream, err = cluster.New(irohRestCfg, func(o *cluster.Options) { + o.Scheme = scheme + }) if err != nil { - setupLog.Error(err, "unable to build iroh dns downstream client") + setupLog.Error(err, "unable to build iroh dns downstream cluster") os.Exit(1) } if err := (&controller.IrohDNSReconciler{ @@ -541,6 +544,12 @@ func main() { return ignoreCanceled(downstreamCluster.Start(ctx)) }) + if irohDownstream != nil { + g.Go(func() error { + return ignoreCanceled(irohDownstream.Start(ctx)) + }) + } + setupLog.Info("starting multicluster manager") g.Go(func() error { return ignoreCanceled(mgr.Start(ctx)) diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go index 65ce264..8cd46ed 100644 --- a/internal/controller/iroh_dns_controller.go +++ b/internal/controller/iroh_dns_controller.go @@ -13,7 +13,9 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/cluster" @@ -23,6 +25,7 @@ import ( mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder" mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager" mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile" + mcsource "sigs.k8s.io/multicluster-runtime/pkg/source" dnsv1alpha1 "go.miloapis.com/dns-operator/api/v1alpha1" @@ -34,6 +37,23 @@ import ( const ( irohDNSFinalizer = "networking.datumapis.com/iroh-dns-cleanup" irohDNSFieldManager = "network-services-operator/iroh-dns" + + // labels stamped on every DNSRecordSet we manage. Used both for + // observability and (more importantly) so the downstream watch + // can route an event back to the owning Connector across clusters. + irohDNSManagedByLabelValue = "network-services-operator-iroh-dns" + irohDNSClaimedByUIDLabel = "networking.datumapis.com/iroh-dns-claimed-by-uid" + irohDNSConnectorClusterLabel = "networking.datumapis.com/iroh-dns-connector-cluster" + irohDNSConnectorNamespaceLabel = "networking.datumapis.com/iroh-dns-connector-namespace" + irohDNSConnectorNameLabel = "networking.datumapis.com/iroh-dns-connector-name" + + // Connector status condition surfacing whether *this* Connector is the + // one publishing the iroh DNS record for its endpoint id. + connectorConditionIrohDNSPublished = "IrohDNSPublished" + + connectorReasonIrohOwner = "Owner" + connectorReasonIrohDeferredToOwner = "DeferredToOwner" + connectorReasonIrohPending = "Pending" ) // allowedIrohControllerNames is the set of ConnectorClass.spec.controllerName @@ -46,16 +66,20 @@ var allowedIrohControllerNames = map[string]struct{}{ } // IrohDNSReconciler watches Connectors backed by an iroh-routed -// ConnectorClass and maintains a 1-1 DNSRecordSet in the configured -// downstream cluster carrying the iroh-format TXT record at -// "..". +// ConnectorClass and maintains a single downstream DNSRecordSet per iroh +// endpoint id (z32-encoded public key) carrying the iroh DNS-discovery +// TXT records. Multiple Connectors that share the same iroh keypair (e.g. +// the same agent registered against two projects) collapse to one +// DNSRecordSet — the first to claim wins, and the loser surfaces a +// DeferredToOwner condition rather than fighting at the DNS layer. type IrohDNSReconciler struct { mgr mcmanager.Manager Config config.NetworkServicesOperator - Downstream client.Client + Downstream cluster.Cluster } // +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectors,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectors/status,verbs=update;patch // +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectors/finalizers,verbs=update // +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectorclasses,verbs=get;list;watch @@ -76,20 +100,8 @@ func (r *IrohDNSReconciler) Reconcile(ctx context.Context, req mcreconcile.Reque return ctrl.Result{}, err } - dnsName := r.dnsRecordSetName(&connector) - if !connector.DeletionTimestamp.IsZero() { - if !controllerutil.ContainsFinalizer(&connector, irohDNSFinalizer) { - return ctrl.Result{}, nil - } - if err := r.deleteRecordSet(ctx, dnsName); err != nil { - return ctrl.Result{}, err - } - controllerutil.RemoveFinalizer(&connector, irohDNSFinalizer) - if err := cl.GetClient().Update(ctx, &connector); err != nil { - return ctrl.Result{}, fmt.Errorf("remove finalizer: %w", err) - } - return ctrl.Result{}, nil + return r.handleDeletion(ctx, cl, &connector) } matches, err := r.classRoutesToIroh(ctx, cl, &connector) @@ -97,18 +109,8 @@ func (r *IrohDNSReconciler) Reconcile(ctx context.Context, req mcreconcile.Reque return ctrl.Result{}, err } if !matches { - // Class doesn't route here. If we previously owned a record (finalizer - // present), tear it down and release the Connector. - if controllerutil.ContainsFinalizer(&connector, irohDNSFinalizer) { - if err := r.deleteRecordSet(ctx, dnsName); err != nil { - return ctrl.Result{}, err - } - controllerutil.RemoveFinalizer(&connector, irohDNSFinalizer) - if err := cl.GetClient().Update(ctx, &connector); err != nil { - return ctrl.Result{}, fmt.Errorf("release finalizer: %w", err) - } - } - return ctrl.Result{}, nil + // Class doesn't route here. If we previously held a claim, release it. + return ctrl.Result{}, r.releaseIfOwner(ctx, cl, &connector) } if !controllerutil.ContainsFinalizer(&connector, irohDNSFinalizer) { @@ -116,29 +118,114 @@ func (r *IrohDNSReconciler) Reconcile(ctx context.Context, req mcreconcile.Reque if err := cl.GetClient().Update(ctx, &connector); err != nil { return ctrl.Result{}, fmt.Errorf("add finalizer: %w", err) } - // Requeue via the inevitable update event; nothing more to do this pass. return ctrl.Result{}, nil } - desired, ok, err := r.buildDesiredRecordSet(&connector) + desired, ok, err := r.buildDesiredRecordSet(req.ClusterName, &connector) if err != nil { return ctrl.Result{}, err } if !ok { - // Status not yet populated by the agent. Tear down any prior record so - // stale entries don't linger; reconcile fires again on status update. - if err := r.deleteRecordSet(ctx, dnsName); err != nil { + // Status not yet populated by the agent. If we previously claimed + // this endpoint id, release so a sibling can take over. + if err := r.releaseIfOwner(ctx, cl, &connector); err != nil { return ctrl.Result{}, err } - return ctrl.Result{}, nil + return ctrl.Result{}, r.setPublishedCondition(ctx, cl, &connector, metav1.ConditionFalse, connectorReasonIrohPending, "Connector status does not yet carry connection details.") + } + + return ctrl.Result{}, r.applyClaim(ctx, cl, &connector, desired) +} + +// applyClaim implements the claim-then-write loop: +// - Get the DNSRecordSet at the deterministic z32-derived name. +// - Not found → Create with our claim. AlreadyExists means a sibling beat +// us; we re-fetch and continue. +// - Found, our claim → SSA refresh content. +// - Found, foreign claim → defer (no write) and surface a status +// condition naming the owner. +func (r *IrohDNSReconciler) applyClaim(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector, desired *dnsv1alpha1.DNSRecordSet) error { + key := client.ObjectKeyFromObject(desired) + var existing dnsv1alpha1.DNSRecordSet + err := r.Downstream.GetClient().Get(ctx, key, &existing) + + switch { + case apierrors.IsNotFound(err): + if err := r.Downstream.GetClient().Create(ctx, desired); err != nil { + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create DNSRecordSet: %w", err) + } + // Sibling raced us. Refetch and fall through to the foreign-claim + // branch on next reconcile. + return nil + } + return r.setPublishedCondition(ctx, cl, connector, metav1.ConditionTrue, connectorReasonIrohOwner, "Owns iroh DNS record.") + + case err != nil: + return fmt.Errorf("get DNSRecordSet: %w", err) + } + + currentClaim := existing.Labels[irohDNSClaimedByUIDLabel] + if currentClaim != string(connector.UID) { + ownerRef := existing.Labels[irohDNSConnectorClusterLabel] + "/" + existing.Labels[irohDNSConnectorNamespaceLabel] + "/" + existing.Labels[irohDNSConnectorNameLabel] + return r.setPublishedCondition(ctx, cl, connector, metav1.ConditionFalse, connectorReasonIrohDeferredToOwner, + fmt.Sprintf("iroh DNS record is owned by Connector %s (uid %s).", ownerRef, currentClaim)) + } + + // We own it. SSA the desired content. + if err := r.Downstream.GetClient().Patch(ctx, desired, client.Apply, client.FieldOwner(irohDNSFieldManager), client.ForceOwnership); err != nil { + return fmt.Errorf("apply DNSRecordSet: %w", err) } + return r.setPublishedCondition(ctx, cl, connector, metav1.ConditionTrue, connectorReasonIrohOwner, "Owns iroh DNS record.") +} - if err := r.applyRecordSet(ctx, desired); err != nil { +// handleDeletion releases the claim (if held) and removes the finalizer. +func (r *IrohDNSReconciler) handleDeletion(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector) (ctrl.Result, error) { + if !controllerutil.ContainsFinalizer(connector, irohDNSFinalizer) { + return ctrl.Result{}, nil + } + if err := r.releaseIfOwner(ctx, cl, connector); err != nil { return ctrl.Result{}, err } + controllerutil.RemoveFinalizer(connector, irohDNSFinalizer) + if err := cl.GetClient().Update(ctx, connector); err != nil { + return ctrl.Result{}, fmt.Errorf("remove finalizer: %w", err) + } return ctrl.Result{}, nil } +// releaseIfOwner deletes the DNSRecordSet for this Connector's endpoint id +// only if we currently hold the claim. Otherwise it's a no-op (the foreign +// owner manages the record's lifecycle). +// +// We compute the DNSRecordSet name from the Connector's status — if the +// status is empty we have no z32 to derive, which means we never could +// have created a record in the first place, so there's nothing to release. +func (r *IrohDNSReconciler) releaseIfOwner(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector) error { + z32, err := connectorEndpointZ32(connector) + if err != nil || z32 == "" { + return nil + } + key := client.ObjectKey{ + Namespace: r.Config.Connector.Iroh.DNSZoneRef.Namespace, + Name: irohDNSRecordSetName(z32), + } + var existing dnsv1alpha1.DNSRecordSet + if err := r.Downstream.GetClient().Get(ctx, key, &existing); err != nil { + if apierrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("get DNSRecordSet for release: %w", err) + } + if existing.Labels[irohDNSClaimedByUIDLabel] != string(connector.UID) { + return nil + } + if err := r.Downstream.GetClient().Delete(ctx, &existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("delete DNSRecordSet: %w", err) + } + return nil +} + func (r *IrohDNSReconciler) classRoutesToIroh(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector) (bool, error) { if connector.Spec.ConnectorClassName == "" { return false, nil @@ -154,33 +241,39 @@ func (r *IrohDNSReconciler) classRoutesToIroh(ctx context.Context, cl cluster.Cl return ok, nil } -func (r *IrohDNSReconciler) dnsRecordSetName(connector *networkingv1alpha1.Connector) client.ObjectKey { - return client.ObjectKey{ - Namespace: r.Config.Connector.Iroh.DNSZoneRef.Namespace, - Name: "iroh-" + string(connector.UID), - } +// irohDNSRecordSetName returns the deterministic DNSRecordSet name for an +// iroh endpoint id. One name per endpoint id means multiple Connectors +// reusing the same key collapse onto a single record. +func irohDNSRecordSetName(z32 string) string { + return "iroh-" + z32 } -// buildDesiredRecordSet builds the DNSRecordSet we want present in the -// downstream cluster. The second return value is false when the Connector -// status doesn't yet carry enough data to publish a useful record (no -// endpoint id at all, or neither relay nor any addresses). -func (r *IrohDNSReconciler) buildDesiredRecordSet(connector *networkingv1alpha1.Connector) (*dnsv1alpha1.DNSRecordSet, bool, error) { +func connectorEndpointZ32(connector *networkingv1alpha1.Connector) (string, error) { if connector.Status.ConnectionDetails == nil || connector.Status.ConnectionDetails.PublicKey == nil { - return nil, false, nil + return "", nil } pk := connector.Status.ConnectionDetails.PublicKey if pk.Id == "" { - return nil, false, nil - } - if pk.HomeRelay == "" && len(pk.Addresses) == 0 { - return nil, false, nil + return "", nil } + return iroh.EndpointHexToZ32(pk.Id) +} - z32, err := iroh.EndpointHexToZ32(pk.Id) +// buildDesiredRecordSet builds the DNSRecordSet we want present in the +// downstream cluster. The second return value is false when the Connector +// status doesn't yet carry enough data to publish a useful record. +func (r *IrohDNSReconciler) buildDesiredRecordSet(clusterName string, connector *networkingv1alpha1.Connector) (*dnsv1alpha1.DNSRecordSet, bool, error) { + z32, err := connectorEndpointZ32(connector) if err != nil { return nil, false, fmt.Errorf("encode endpoint id: %w", err) } + if z32 == "" { + return nil, false, nil + } + pk := connector.Status.ConnectionDetails.PublicKey + if pk.HomeRelay == "" && len(pk.Addresses) == 0 { + return nil, false, nil + } cfg := r.Config.Connector.Iroh // RecordEntry.Name is relative to the DNSZone — dns-operator @@ -192,7 +285,6 @@ func (r *IrohDNSReconciler) buildDesiredRecordSet(connector *networkingv1alpha1. recordName = recordName + "." + cfg.RecordSuffix } ttl := int64(cfg.TTLSeconds) - key := r.dnsRecordSetName(connector) var entries []dnsv1alpha1.RecordEntry if pk.HomeRelay != "" { @@ -216,13 +308,14 @@ func (r *IrohDNSReconciler) buildDesiredRecordSet(connector *networkingv1alpha1. Kind: "DNSRecordSet", }, ObjectMeta: metav1.ObjectMeta{ - Name: key.Name, - Namespace: key.Namespace, + Name: irohDNSRecordSetName(z32), + Namespace: cfg.DNSZoneRef.Namespace, Labels: map[string]string{ - "app.kubernetes.io/managed-by": "network-services-operator", - "networking.datumapis.com/connector-uid": string(connector.UID), - "networking.datumapis.com/connector-namespace": connector.Namespace, - "networking.datumapis.com/connector-name": connector.Name, + "app.kubernetes.io/managed-by": irohDNSManagedByLabelValue, + irohDNSClaimedByUIDLabel: string(connector.UID), + irohDNSConnectorClusterLabel: clusterName, + irohDNSConnectorNamespaceLabel: connector.Namespace, + irohDNSConnectorNameLabel: connector.Name, }, }, Spec: dnsv1alpha1.DNSRecordSetSpec{ @@ -256,28 +349,59 @@ func joinIrohAddresses(addrs []networkingv1alpha1.PublicKeyConnectorAddress) str return strings.Join(parts, " ") } -func (r *IrohDNSReconciler) applyRecordSet(ctx context.Context, desired *dnsv1alpha1.DNSRecordSet) error { - if err := r.Downstream.Patch(ctx, desired, client.Apply, client.FieldOwner(irohDNSFieldManager), client.ForceOwnership); err != nil { - return fmt.Errorf("apply DNSRecordSet: %w", err) +func (r *IrohDNSReconciler) setPublishedCondition(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector, status metav1.ConditionStatus, reason, message string) error { + cond := metav1.Condition{ + Type: connectorConditionIrohDNSPublished, + Status: status, + Reason: reason, + Message: message, + ObservedGeneration: connector.Generation, } - return nil -} - -func (r *IrohDNSReconciler) deleteRecordSet(ctx context.Context, key client.ObjectKey) error { - drs := &dnsv1alpha1.DNSRecordSet{ObjectMeta: metav1.ObjectMeta{Name: key.Name, Namespace: key.Namespace}} - if err := r.Downstream.Delete(ctx, drs); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("delete DNSRecordSet: %w", err) + if !apimeta.SetStatusCondition(&connector.Status.Conditions, cond) { + return nil + } + if err := cl.GetClient().Status().Update(ctx, connector); err != nil { + return fmt.Errorf("update connector status: %w", err) } return nil } -// SetupWithManager wires the reconciler into the multicluster manager. Watches -// the upstream Connector and ConnectorClass; the downstream DNSRecordSet -// client is held directly on the reconciler since it lives in a different -// cluster than the manager. +// SetupWithManager wires the reconciler. Watches: +// +// - Connector (For) and ConnectorClass (Watches) — the primary multicluster +// event sources. Connector activity (status updates from agent lease +// renewals, generation bumps) drives reconciles often enough that sibling +// handover after an owner releases happens within seconds. +// +// - DNSRecordSet on the downstream cluster — fires on changes to records we +// manage. The mapper enqueues the *current owner* Connector identified by +// the labels on the DNSRecordSet; this catches drift (e.g. external +// deletion) for the owner. Sibling handover does not flow through this +// watch — multicluster-runtime's manager exposes GetCluster(name) but no +// enumeration, so a downstream event can't fan out to siblings across +// project clusters. The Connector watch is what catches them. func (r *IrohDNSReconciler) SetupWithManager(mgr mcmanager.Manager) error { r.mgr = mgr + downstreamSource := mcsource.Kind( + &dnsv1alpha1.DNSRecordSet{}, + func(_ string, _ cluster.Cluster) handler.TypedEventHandler[*dnsv1alpha1.DNSRecordSet, mcreconcile.Request] { + return handler.TypedEnqueueRequestsFromMapFunc(func(_ context.Context, drs *dnsv1alpha1.DNSRecordSet) []mcreconcile.Request { + clusterName := drs.Labels[irohDNSConnectorClusterLabel] + name := drs.Labels[irohDNSConnectorNameLabel] + ns := drs.Labels[irohDNSConnectorNamespaceLabel] + if name == "" || ns == "" { + return nil + } + return []mcreconcile.Request{{ + ClusterName: clusterName, + Request: ctrl.Request{NamespacedName: types.NamespacedName{Namespace: ns, Name: name}}, + }} + }) + }, + ) + downstreamClusterSource, _, _ := downstreamSource.ForCluster("", r.Downstream) + return mcbuilder.ControllerManagedBy(mgr). For(&networkingv1alpha1.Connector{}). Watches( @@ -309,6 +433,7 @@ func (r *IrohDNSReconciler) SetupWithManager(mgr mcmanager.Manager) error { }) }, ). + WatchesRawSource(downstreamClusterSource). Named("iroh-dns"). Complete(r) } diff --git a/internal/controller/iroh_dns_controller_test.go b/internal/controller/iroh_dns_controller_test.go index 07b2e75..54670e4 100644 --- a/internal/controller/iroh_dns_controller_test.go +++ b/internal/controller/iroh_dns_controller_test.go @@ -19,6 +19,9 @@ import ( const ( testEndpointHex = "f120d52e42bfcee750508baf28900acac85ad3f397ab4bb653b32be505c32d39" testEndpointZ32 = "6ropkm1nz98qqwnotqz1tryk3mrfiw9u16iwzp1usci6kbqdfwho" + + testClusterName = "test-cluster" + testConnectorUID = "00000000-0000-0000-0000-000000000abc" ) func newReconciler() *IrohDNSReconciler { @@ -45,7 +48,7 @@ func newConnector(pk *networkingv1alpha1.ConnectorConnectionDetailsPublicKey) *n ObjectMeta: metav1.ObjectMeta{ Name: "edge-1", Namespace: "default", - UID: types.UID("00000000-0000-0000-0000-000000000abc"), + UID: types.UID(testConnectorUID), }, Spec: networkingv1alpha1.ConnectorSpec{ ConnectorClassName: "datum-connect", @@ -103,7 +106,7 @@ func TestBuildDesiredRecordSet_StatusGating(t *testing.T) { r := newReconciler() for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - _, ok, err := r.buildDesiredRecordSet(newConnector(tt.pk)) + _, ok, err := r.buildDesiredRecordSet(testClusterName, newConnector(tt.pk)) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -125,12 +128,14 @@ func TestBuildDesiredRecordSet_RecordContents(t *testing.T) { }, }) - drs, ok, err := r.buildDesiredRecordSet(conn) + drs, ok, err := r.buildDesiredRecordSet(testClusterName, conn) if err != nil || !ok { t.Fatalf("buildDesiredRecordSet failed: ok=%v err=%v", ok, err) } - wantName := "iroh-" + string(conn.UID) + // DNSRecordSet name is keyed by z32 endpoint id (one record per + // endpoint, not per Connector UID) — see claim-based ownership design. + wantName := "iroh-" + testEndpointZ32 if drs.Name != wantName { t.Errorf("Name = %q, want %q", drs.Name, wantName) } @@ -165,11 +170,14 @@ func TestBuildDesiredRecordSet_RecordContents(t *testing.T) { } } + // Labels track the claim and the owner Connector identity. The watch + // on downstream DNSRecordSet uses these to enqueue the owner on changes. for k, v := range map[string]string{ - "app.kubernetes.io/managed-by": "network-services-operator", - "networking.datumapis.com/connector-uid": string(conn.UID), - "networking.datumapis.com/connector-namespace": conn.Namespace, - "networking.datumapis.com/connector-name": conn.Name, + "app.kubernetes.io/managed-by": irohDNSManagedByLabelValue, + irohDNSClaimedByUIDLabel: testConnectorUID, + irohDNSConnectorClusterLabel: testClusterName, + irohDNSConnectorNamespaceLabel: conn.Namespace, + irohDNSConnectorNameLabel: conn.Name, } { if drs.Labels[k] != v { t.Errorf("label %q = %q, want %q", k, drs.Labels[k], v) @@ -184,7 +192,7 @@ func TestBuildDesiredRecordSet_RelayOnlyOmitsAddrEntry(t *testing.T) { HomeRelay: "https://relay.example.com", }) - drs, _, err := r.buildDesiredRecordSet(conn) + drs, _, err := r.buildDesiredRecordSet(testClusterName, conn) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -204,7 +212,7 @@ func TestBuildDesiredRecordSet_EmptySuffixPutsRecordsUnderZoneRoot(t *testing.T) HomeRelay: "https://relay.example.com", }) - drs, _, err := r.buildDesiredRecordSet(conn) + drs, _, err := r.buildDesiredRecordSet(testClusterName, conn) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -220,11 +228,52 @@ func TestBuildDesiredRecordSet_InvalidEndpointId(t *testing.T) { Id: "not-hex", HomeRelay: "https://relay.example.com", }) - if _, _, err := r.buildDesiredRecordSet(conn); err == nil { + if _, _, err := r.buildDesiredRecordSet(testClusterName, conn); err == nil { t.Fatal("expected error for non-hex endpoint id, got nil") } } +// TestBuildDesiredRecordSet_TwoConnectorsSameKeyProduceSameName verifies the +// load-bearing claim-based property: two distinct Connectors that share an +// iroh keypair compute the same DNSRecordSet name. This is what lets the +// claim-based reconciler dedupe them. +func TestBuildDesiredRecordSet_TwoConnectorsSameKeyProduceSameName(t *testing.T) { + r := newReconciler() + pk := &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: "https://relay.example.com", + } + + a := newConnector(pk) + a.UID = types.UID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa") + a.Name = "edge-a" + + b := newConnector(pk) + b.UID = types.UID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb") + b.Name = "edge-b" + + drsA, _, err := r.buildDesiredRecordSet("cluster-a", a) + if err != nil { + t.Fatalf("buildDesiredRecordSet(a): %v", err) + } + drsB, _, err := r.buildDesiredRecordSet("cluster-b", b) + if err != nil { + t.Fatalf("buildDesiredRecordSet(b): %v", err) + } + + if drsA.Name != drsB.Name { + t.Errorf("expected matching DNSRecordSet name, got A=%q B=%q", drsA.Name, drsB.Name) + } + // Each Connector still stamps its own claim and identity labels — the + // reconciler uses these to detect "is this DNSRecordSet mine". + if drsA.Labels[irohDNSClaimedByUIDLabel] == drsB.Labels[irohDNSClaimedByUIDLabel] { + t.Errorf("expected distinct claim labels, got both = %q", drsA.Labels[irohDNSClaimedByUIDLabel]) + } + if drsA.Labels[irohDNSConnectorClusterLabel] == drsB.Labels[irohDNSConnectorClusterLabel] { + t.Errorf("expected distinct cluster labels, got both = %q", drsA.Labels[irohDNSConnectorClusterLabel]) + } +} + func TestJoinIrohAddresses(t *testing.T) { tests := []struct { name string From 825225bc7e6bc37ba20e08f5a715cc38a35aedc5 Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Fri, 1 May 2026 13:05:18 -0700 Subject: [PATCH 5/9] fix: encode cluster name in label + watch Lease for sibling handover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related fixes pushed to staging by the earlier deploy were both broken: 1. The downstream DNSRecordSet Create kept failing with `metadata.labels: Invalid value: "/test-project-xp3tg3": a valid label must be ... [A-Za-z0-9][-A-Za-z0-9_.]* ...`. Multicluster- runtime cluster names start with "/", which is invalid as a raw k8s label value. Apply the same `cluster-` encoding pattern that downstreamclient/mappednamespace.go uses inline. Decode in the watch handler and the foreign-claim status message. 2. The reconciler couldn't make handover converge. When an owner Connector is deleted, sibling Connectors that share the same iroh keypair need to reconcile and take over. The Connector watch alone wasn't enough — the agent updates the Lease on every heartbeat but doesn't necessarily touch the Connector itself, so the Connector watch stayed silent for long stretches. Mirror the pattern from connector_controller.go and add `Watches(&Lease{}, EnqueueRequestForOwner(&Connector{}, OnlyControllerOwner()))`. Lease renewals now fire iroh-dns reconciles on the lease cadence (≤30s in staging), bounding handover time. Tests cover the encoded-form expectation in the labels assertion and add a round-trip TestEncodeDecodeIrohClusterLabel. --- internal/controller/iroh_dns_controller.go | 49 ++++++++++++++----- .../controller/iroh_dns_controller_test.go | 27 ++++++++-- 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go index 8cd46ed..7aee66c 100644 --- a/internal/controller/iroh_dns_controller.go +++ b/internal/controller/iroh_dns_controller.go @@ -11,6 +11,7 @@ import ( "strconv" "strings" + coordinationv1 "k8s.io/api/coordination/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" @@ -23,6 +24,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder" + mchandler "sigs.k8s.io/multicluster-runtime/pkg/handler" mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager" mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile" mcsource "sigs.k8s.io/multicluster-runtime/pkg/source" @@ -167,7 +169,8 @@ func (r *IrohDNSReconciler) applyClaim(ctx context.Context, cl cluster.Cluster, currentClaim := existing.Labels[irohDNSClaimedByUIDLabel] if currentClaim != string(connector.UID) { - ownerRef := existing.Labels[irohDNSConnectorClusterLabel] + "/" + existing.Labels[irohDNSConnectorNamespaceLabel] + "/" + existing.Labels[irohDNSConnectorNameLabel] + ownerCluster := decodeIrohClusterLabel(existing.Labels[irohDNSConnectorClusterLabel]) + ownerRef := ownerCluster + "/" + existing.Labels[irohDNSConnectorNamespaceLabel] + "/" + existing.Labels[irohDNSConnectorNameLabel] return r.setPublishedCondition(ctx, cl, connector, metav1.ConditionFalse, connectorReasonIrohDeferredToOwner, fmt.Sprintf("iroh DNS record is owned by Connector %s (uid %s).", ownerRef, currentClaim)) } @@ -248,6 +251,18 @@ func irohDNSRecordSetName(z32 string) string { return "iroh-" + z32 } +// encodeIrohClusterLabel mirrors the inline pattern in +// downstreamclient/mappednamespace.go: multicluster-runtime cluster +// names start with "/" (invalid as a k8s label value), so we map "/" +// to "_" and prefix "cluster-" to produce a label-safe form. +func encodeIrohClusterLabel(clusterName string) string { + return "cluster-" + strings.ReplaceAll(clusterName, "/", "_") +} + +func decodeIrohClusterLabel(label string) string { + return strings.TrimPrefix(strings.ReplaceAll(label, "_", "/"), "cluster-") +} + func connectorEndpointZ32(connector *networkingv1alpha1.Connector) (string, error) { if connector.Status.ConnectionDetails == nil || connector.Status.ConnectionDetails.PublicKey == nil { return "", nil @@ -313,7 +328,7 @@ func (r *IrohDNSReconciler) buildDesiredRecordSet(clusterName string, connector Labels: map[string]string{ "app.kubernetes.io/managed-by": irohDNSManagedByLabelValue, irohDNSClaimedByUIDLabel: string(connector.UID), - irohDNSConnectorClusterLabel: clusterName, + irohDNSConnectorClusterLabel: encodeIrohClusterLabel(clusterName), irohDNSConnectorNamespaceLabel: connector.Namespace, irohDNSConnectorNameLabel: connector.Name, }, @@ -369,17 +384,23 @@ func (r *IrohDNSReconciler) setPublishedCondition(ctx context.Context, cl cluste // SetupWithManager wires the reconciler. Watches: // // - Connector (For) and ConnectorClass (Watches) — the primary multicluster -// event sources. Connector activity (status updates from agent lease -// renewals, generation bumps) drives reconciles often enough that sibling -// handover after an owner releases happens within seconds. +// event sources. // -// - DNSRecordSet on the downstream cluster — fires on changes to records we -// manage. The mapper enqueues the *current owner* Connector identified by -// the labels on the DNSRecordSet; this catches drift (e.g. external -// deletion) for the owner. Sibling handover does not flow through this -// watch — multicluster-runtime's manager exposes GetCluster(name) but no +// - Lease (Watches with EnqueueRequestForOwner) — agent heartbeats renew the +// Connector's Lease on every interval; that update fires our reconcile +// even when the Connector itself hasn't changed. This is the load-bearing +// trigger for sibling handover: when an owner Connector is deleted and +// its DNSRecordSet is GC'd, every sibling's next lease renewal drives its +// reconcile, and one of them wins the Create race for the now-empty z32. +// Bound on handover ≈ leaseDurationSeconds. +// +// - DNSRecordSet on the downstream cluster — drift detection. Mapper +// enqueues the *current owner* Connector identified by the labels on the +// DNSRecordSet, catching cases like a manual external delete of the +// record. Sibling handover does NOT flow through this watch: +// multicluster-runtime's manager exposes GetCluster(name) but no // enumeration, so a downstream event can't fan out to siblings across -// project clusters. The Connector watch is what catches them. +// project clusters. That's the Lease watch's job. func (r *IrohDNSReconciler) SetupWithManager(mgr mcmanager.Manager) error { r.mgr = mgr @@ -387,12 +408,12 @@ func (r *IrohDNSReconciler) SetupWithManager(mgr mcmanager.Manager) error { &dnsv1alpha1.DNSRecordSet{}, func(_ string, _ cluster.Cluster) handler.TypedEventHandler[*dnsv1alpha1.DNSRecordSet, mcreconcile.Request] { return handler.TypedEnqueueRequestsFromMapFunc(func(_ context.Context, drs *dnsv1alpha1.DNSRecordSet) []mcreconcile.Request { - clusterName := drs.Labels[irohDNSConnectorClusterLabel] name := drs.Labels[irohDNSConnectorNameLabel] ns := drs.Labels[irohDNSConnectorNamespaceLabel] if name == "" || ns == "" { return nil } + clusterName := decodeIrohClusterLabel(drs.Labels[irohDNSConnectorClusterLabel]) return []mcreconcile.Request{{ ClusterName: clusterName, Request: ctrl.Request{NamespacedName: types.NamespacedName{Namespace: ns, Name: name}}, @@ -433,6 +454,10 @@ func (r *IrohDNSReconciler) SetupWithManager(mgr mcmanager.Manager) error { }) }, ). + Watches( + &coordinationv1.Lease{}, + mchandler.EnqueueRequestForOwner(&networkingv1alpha1.Connector{}, handler.OnlyControllerOwner()), + ). WatchesRawSource(downstreamClusterSource). Named("iroh-dns"). Complete(r) diff --git a/internal/controller/iroh_dns_controller_test.go b/internal/controller/iroh_dns_controller_test.go index 54670e4..667f830 100644 --- a/internal/controller/iroh_dns_controller_test.go +++ b/internal/controller/iroh_dns_controller_test.go @@ -20,8 +20,11 @@ const ( testEndpointHex = "f120d52e42bfcee750508baf28900acac85ad3f397ab4bb653b32be505c32d39" testEndpointZ32 = "6ropkm1nz98qqwnotqz1tryk3mrfiw9u16iwzp1usci6kbqdfwho" - testClusterName = "test-cluster" - testConnectorUID = "00000000-0000-0000-0000-000000000abc" + // multicluster-runtime cluster names start with "/" — set the test + // vector that way so the label encoding is exercised. + testClusterName = "/test-project-staging" + testClusterNameEncoded = "cluster-_test-project-staging" + testConnectorUID = "00000000-0000-0000-0000-000000000abc" ) func newReconciler() *IrohDNSReconciler { @@ -175,7 +178,7 @@ func TestBuildDesiredRecordSet_RecordContents(t *testing.T) { for k, v := range map[string]string{ "app.kubernetes.io/managed-by": irohDNSManagedByLabelValue, irohDNSClaimedByUIDLabel: testConnectorUID, - irohDNSConnectorClusterLabel: testClusterName, + irohDNSConnectorClusterLabel: testClusterNameEncoded, irohDNSConnectorNamespaceLabel: conn.Namespace, irohDNSConnectorNameLabel: conn.Name, } { @@ -185,6 +188,24 @@ func TestBuildDesiredRecordSet_RecordContents(t *testing.T) { } } +func TestEncodeDecodeIrohClusterLabel(t *testing.T) { + tests := []string{ + "", + "/test-project-staging", + "/zachs-project-z5pegw", + "plain-no-slashes", + "/with/multiple/slashes", + } + for _, want := range tests { + t.Run(want, func(t *testing.T) { + got := decodeIrohClusterLabel(encodeIrohClusterLabel(want)) + if got != want { + t.Errorf("round-trip mismatch: encode(%q) -> decode = %q", want, got) + } + }) + } +} + func TestBuildDesiredRecordSet_RelayOnlyOmitsAddrEntry(t *testing.T) { r := newReconciler() conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ From f7b1114230f1f3f4e013a5b1d20be5394dbb2912 Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Fri, 1 May 2026 13:12:40 -0700 Subject: [PATCH 6/9] lint: drop unused cluster.Cluster arg from releaseIfOwner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI lint flagged releaseIfOwner's cl parameter as unused — the function only writes to the downstream client, not the upstream cluster. Drop it from the signature and the three call sites. Also extracts a few repeated test literals (relay URL, sample IPv4 and IPv6 addresses) into package-level constants, since the new test cases pushed their occurrence counts past goconst's threshold. --- internal/controller/iroh_dns_controller.go | 8 +-- .../controller/iroh_dns_controller_test.go | 50 ++++++++++--------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go index 7aee66c..2b894c4 100644 --- a/internal/controller/iroh_dns_controller.go +++ b/internal/controller/iroh_dns_controller.go @@ -112,7 +112,7 @@ func (r *IrohDNSReconciler) Reconcile(ctx context.Context, req mcreconcile.Reque } if !matches { // Class doesn't route here. If we previously held a claim, release it. - return ctrl.Result{}, r.releaseIfOwner(ctx, cl, &connector) + return ctrl.Result{}, r.releaseIfOwner(ctx, &connector) } if !controllerutil.ContainsFinalizer(&connector, irohDNSFinalizer) { @@ -130,7 +130,7 @@ func (r *IrohDNSReconciler) Reconcile(ctx context.Context, req mcreconcile.Reque if !ok { // Status not yet populated by the agent. If we previously claimed // this endpoint id, release so a sibling can take over. - if err := r.releaseIfOwner(ctx, cl, &connector); err != nil { + if err := r.releaseIfOwner(ctx, &connector); err != nil { return ctrl.Result{}, err } return ctrl.Result{}, r.setPublishedCondition(ctx, cl, &connector, metav1.ConditionFalse, connectorReasonIrohPending, "Connector status does not yet carry connection details.") @@ -187,7 +187,7 @@ func (r *IrohDNSReconciler) handleDeletion(ctx context.Context, cl cluster.Clust if !controllerutil.ContainsFinalizer(connector, irohDNSFinalizer) { return ctrl.Result{}, nil } - if err := r.releaseIfOwner(ctx, cl, connector); err != nil { + if err := r.releaseIfOwner(ctx, connector); err != nil { return ctrl.Result{}, err } controllerutil.RemoveFinalizer(connector, irohDNSFinalizer) @@ -204,7 +204,7 @@ func (r *IrohDNSReconciler) handleDeletion(ctx context.Context, cl cluster.Clust // We compute the DNSRecordSet name from the Connector's status — if the // status is empty we have no z32 to derive, which means we never could // have created a record in the first place, so there's nothing to release. -func (r *IrohDNSReconciler) releaseIfOwner(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector) error { +func (r *IrohDNSReconciler) releaseIfOwner(ctx context.Context, connector *networkingv1alpha1.Connector) error { z32, err := connectorEndpointZ32(connector) if err != nil || z32 == "" { return nil diff --git a/internal/controller/iroh_dns_controller_test.go b/internal/controller/iroh_dns_controller_test.go index 667f830..72b6d5a 100644 --- a/internal/controller/iroh_dns_controller_test.go +++ b/internal/controller/iroh_dns_controller_test.go @@ -25,6 +25,10 @@ const ( testClusterName = "/test-project-staging" testClusterNameEncoded = "cluster-_test-project-staging" testConnectorUID = "00000000-0000-0000-0000-000000000abc" + + testRelayURL = "https://relay.example.com" + testIPv4 = "192.0.2.1" + testIPv6 = "2001:db8::1" ) func newReconciler() *IrohDNSReconciler { @@ -83,7 +87,7 @@ func TestBuildDesiredRecordSet_StatusGating(t *testing.T) { name: "id with relay only — publishes", pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ Id: testEndpointHex, - HomeRelay: "https://relay.example.com", + HomeRelay: testRelayURL, }, want: true, }, @@ -91,7 +95,7 @@ func TestBuildDesiredRecordSet_StatusGating(t *testing.T) { name: "id with addresses only — publishes", pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ Id: testEndpointHex, - Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: "192.0.2.1", Port: 8080}}, + Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv4, Port: 8080}}, }, want: true, }, @@ -99,8 +103,8 @@ func TestBuildDesiredRecordSet_StatusGating(t *testing.T) { name: "id with both — publishes", pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ Id: testEndpointHex, - HomeRelay: "https://relay.example.com", - Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: "192.0.2.1", Port: 8080}}, + HomeRelay: testRelayURL, + Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv4, Port: 8080}}, }, want: true, }, @@ -124,10 +128,10 @@ func TestBuildDesiredRecordSet_RecordContents(t *testing.T) { r := newReconciler() conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ Id: testEndpointHex, - HomeRelay: "https://relay.example.com", + HomeRelay: testRelayURL, Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{ - {Address: "192.0.2.1", Port: 8080}, - {Address: "2001:db8::1", Port: 9090}, + {Address: testIPv4, Port: 8080}, + {Address: testIPv6, Port: 9090}, }, }) @@ -210,7 +214,7 @@ func TestBuildDesiredRecordSet_RelayOnlyOmitsAddrEntry(t *testing.T) { r := newReconciler() conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ Id: testEndpointHex, - HomeRelay: "https://relay.example.com", + HomeRelay: testRelayURL, }) drs, _, err := r.buildDesiredRecordSet(testClusterName, conn) @@ -230,7 +234,7 @@ func TestBuildDesiredRecordSet_EmptySuffixPutsRecordsUnderZoneRoot(t *testing.T) r.Config.Connector.Iroh.RecordSuffix = "" conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ Id: testEndpointHex, - HomeRelay: "https://relay.example.com", + HomeRelay: testRelayURL, }) drs, _, err := r.buildDesiredRecordSet(testClusterName, conn) @@ -247,7 +251,7 @@ func TestBuildDesiredRecordSet_InvalidEndpointId(t *testing.T) { r := newReconciler() conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ Id: "not-hex", - HomeRelay: "https://relay.example.com", + HomeRelay: testRelayURL, }) if _, _, err := r.buildDesiredRecordSet(testClusterName, conn); err == nil { t.Fatal("expected error for non-hex endpoint id, got nil") @@ -262,7 +266,7 @@ func TestBuildDesiredRecordSet_TwoConnectorsSameKeyProduceSameName(t *testing.T) r := newReconciler() pk := &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ Id: testEndpointHex, - HomeRelay: "https://relay.example.com", + HomeRelay: testRelayURL, } a := newConnector(pk) @@ -304,35 +308,35 @@ func TestJoinIrohAddresses(t *testing.T) { {name: "empty", addrs: nil, want: ""}, { name: "single ipv4", - addrs: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: "192.0.2.1", Port: 8080}}, + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv4, Port: 8080}}, want: "192.0.2.1:8080", }, { name: "single ipv6 — bracketed", - addrs: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: "2001:db8::1", Port: 9090}}, + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv6, Port: 9090}}, want: "[2001:db8::1]:9090", }, { name: "mixed ipv4 + ipv6", addrs: []networkingv1alpha1.PublicKeyConnectorAddress{ - {Address: "192.0.2.1", Port: 8080}, - {Address: "2001:db8::1", Port: 9090}, + {Address: testIPv4, Port: 8080}, + {Address: testIPv6, Port: 9090}, }, want: "192.0.2.1:8080 [2001:db8::1]:9090", }, { name: "input order is normalized — agent may report in any order", addrs: []networkingv1alpha1.PublicKeyConnectorAddress{ - {Address: "2001:db8::1", Port: 9090}, - {Address: "192.0.2.1", Port: 8080}, + {Address: testIPv6, Port: 9090}, + {Address: testIPv4, Port: 8080}, }, want: "192.0.2.1:8080 [2001:db8::1]:9090", }, { name: "same address different ports — sorted by port", addrs: []networkingv1alpha1.PublicKeyConnectorAddress{ - {Address: "192.0.2.1", Port: 9090}, - {Address: "192.0.2.1", Port: 8080}, + {Address: testIPv4, Port: 9090}, + {Address: testIPv4, Port: 8080}, }, want: "192.0.2.1:8080 192.0.2.1:9090", }, @@ -351,12 +355,12 @@ func TestJoinIrohAddresses(t *testing.T) { // other reconciler passes. func TestJoinIrohAddresses_DoesNotMutateInput(t *testing.T) { original := []networkingv1alpha1.PublicKeyConnectorAddress{ - {Address: "2001:db8::1", Port: 9090}, - {Address: "192.0.2.1", Port: 8080}, + {Address: testIPv6, Port: 9090}, + {Address: testIPv4, Port: 8080}, } want := []networkingv1alpha1.PublicKeyConnectorAddress{ - {Address: "2001:db8::1", Port: 9090}, - {Address: "192.0.2.1", Port: 8080}, + {Address: testIPv6, Port: 9090}, + {Address: testIPv4, Port: 8080}, } _ = joinIrohAddresses(original) for i := range want { From 14808fd28eb097c8b811f1784b29f53e522ad2e2 Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Fri, 1 May 2026 13:29:08 -0700 Subject: [PATCH 7/9] chore: align managed-by label value with platform convention Switch app.kubernetes.io/managed-by from "network-services-operator-iroh-dns" to "networking.datumapis.com" so iroh DNS records show up in the same selector queries as gateway-managed records (gateway_dns_controller.go uses the same value). --- internal/controller/iroh_dns_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go index 2b894c4..52a66f8 100644 --- a/internal/controller/iroh_dns_controller.go +++ b/internal/controller/iroh_dns_controller.go @@ -43,7 +43,7 @@ const ( // labels stamped on every DNSRecordSet we manage. Used both for // observability and (more importantly) so the downstream watch // can route an event back to the owning Connector across clusters. - irohDNSManagedByLabelValue = "network-services-operator-iroh-dns" + irohDNSManagedByLabelValue = "networking.datumapis.com" irohDNSClaimedByUIDLabel = "networking.datumapis.com/iroh-dns-claimed-by-uid" irohDNSConnectorClusterLabel = "networking.datumapis.com/iroh-dns-connector-cluster" irohDNSConnectorNamespaceLabel = "networking.datumapis.com/iroh-dns-connector-namespace" From f1ae5a725d45d89f2f3a3756e21c8c07e8b1a338 Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Fri, 1 May 2026 14:13:52 -0700 Subject: [PATCH 8/9] fix: emit one TXT entry per direct address (iroh parser compat) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit iroh's DNS parser at iroh-relay-0.95.1/src/endpoint_info.rs:307-312 runs SocketAddr::from_str on every IrohAttr::Addr value as-is and silently drops failures via .ok(): let ip_addrs = attrs .get(&IrohAttr::Addr) .into_iter() .flatten() .filter_map(|s| SocketAddr::from_str(s).ok()) .map(TransportAddr::Ip); It does not split on whitespace. iroh's serializer (line 511-520) emits one (Addr, addr.to_string()) attribute per IP. So the canonical wire form is multiple "addr=" TXT lines, not one "addr= ..." line. Our controller was packing all addresses into one space-separated TXT line. SocketAddr::from_str("A B C") fails, so iroh dropped the addr= record entirely and only saw the relay= entry. Confirmed in staging iroh-gateway logs: a successful DNS lookup against our origin returned EndpointInfo { addrs: {Relay(...)} } — direct addresses missing despite the TXT record carrying them. Fix: emit one RecordEntry per address. Sort order preserved via sortIrohAddresses so SSA stays a no-op when nothing changed. Tests updated: now asserting 3 records (relay + 2× addr) instead of 2. joinIrohAddresses retired in favor of sortIrohAddresses. Round-trip test now operates on sorted addresses, not joined string. --- internal/controller/iroh_dns_controller.go | 31 ++++----- .../controller/iroh_dns_controller_test.go | 64 +++++++++++-------- 2 files changed, 52 insertions(+), 43 deletions(-) diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go index 52a66f8..f77927d 100644 --- a/internal/controller/iroh_dns_controller.go +++ b/internal/controller/iroh_dns_controller.go @@ -309,11 +309,17 @@ func (r *IrohDNSReconciler) buildDesiredRecordSet(clusterName string, connector TXT: &dnsv1alpha1.TXTRecordSpec{Content: "relay=" + pk.HomeRelay}, }) } - if len(pk.Addresses) > 0 { + // One TXT entry per direct address. iroh's parser expects every + // IrohAttr::Addr value to be exactly one socket address — it calls + // SocketAddr::from_str on the value as-is and silently drops failures + // (iroh-relay-0.95.1/src/endpoint_info.rs:307-312). Joining multiple + // addrs with whitespace into a single TXT line makes the whole line + // fail to parse, so iroh sees no direct addresses. + for _, a := range sortIrohAddresses(pk.Addresses) { entries = append(entries, dnsv1alpha1.RecordEntry{ Name: recordName, TTL: &ttl, - TXT: &dnsv1alpha1.TXTRecordSpec{Content: "addr=" + joinIrohAddresses(pk.Addresses)}, + TXT: &dnsv1alpha1.TXTRecordSpec{Content: "addr=" + net.JoinHostPort(a.Address, strconv.Itoa(int(a.Port)))}, }) } @@ -342,14 +348,13 @@ func (r *IrohDNSReconciler) buildDesiredRecordSet(clusterName string, connector return drs, true, nil } -// joinIrohAddresses formats a list of (address, port) tuples for the -// `addr=` TXT value iroh expects: socket addresses separated by single -// spaces, IPv6 addresses in bracketed form (RFC 3986). The input is -// sorted by (address, port) before joining so that an agent reporting -// the same set of endpoints in different orders across heartbeats — -// iroh-base's iter-over-set is not stable — produces the same TXT -// content and avoids spurious server-side-apply writes. -func joinIrohAddresses(addrs []networkingv1alpha1.PublicKeyConnectorAddress) string { +// sortIrohAddresses returns a deterministically-ordered copy of the +// input — by (address, port) lexicographic. The agent's +// iroh::Endpoint::endpoint_addr().ip_addrs() iterator is iter-over-set +// and not order-stable, so sorting here means the same set of +// endpoints produces the same DNS content across heartbeats and SSA +// stays a no-op when nothing actually changed. +func sortIrohAddresses(addrs []networkingv1alpha1.PublicKeyConnectorAddress) []networkingv1alpha1.PublicKeyConnectorAddress { sorted := slices.Clone(addrs) slices.SortFunc(sorted, func(a, b networkingv1alpha1.PublicKeyConnectorAddress) int { return cmp.Or( @@ -357,11 +362,7 @@ func joinIrohAddresses(addrs []networkingv1alpha1.PublicKeyConnectorAddress) str cmp.Compare(a.Port, b.Port), ) }) - parts := make([]string, 0, len(sorted)) - for _, a := range sorted { - parts = append(parts, net.JoinHostPort(a.Address, strconv.Itoa(int(a.Port)))) - } - return strings.Join(parts, " ") + return sorted } func (r *IrohDNSReconciler) setPublishedCondition(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector, status metav1.ConditionStatus, reason, message string) error { diff --git a/internal/controller/iroh_dns_controller_test.go b/internal/controller/iroh_dns_controller_test.go index 72b6d5a..faded24 100644 --- a/internal/controller/iroh_dns_controller_test.go +++ b/internal/controller/iroh_dns_controller_test.go @@ -157,13 +157,22 @@ func TestBuildDesiredRecordSet_RecordContents(t *testing.T) { } wantRecordName := "_iroh." + testEndpointZ32 + ".connectors" - if len(drs.Spec.Records) != 2 { - t.Fatalf("Records count = %d, want 2 (relay + addr)", len(drs.Spec.Records)) + // One TXT entry per attribute: relay + one addr per direct address. + // iroh's parser SocketAddr::from_str(value) drops anything that isn't + // exactly one socket address, so we cannot pack multiple addrs into + // one space-separated value. + if len(drs.Spec.Records) != 3 { + t.Fatalf("Records count = %d, want 3 (relay + 2× addr)", len(drs.Spec.Records)) + } + gotContents := []string{ + drs.Spec.Records[0].TXT.Content, + drs.Spec.Records[1].TXT.Content, + drs.Spec.Records[2].TXT.Content, } - gotContents := []string{drs.Spec.Records[0].TXT.Content, drs.Spec.Records[1].TXT.Content} wantContents := []string{ "relay=https://relay.example.com", - "addr=192.0.2.1:8080 [2001:db8::1]:9090", + "addr=192.0.2.1:8080", + "addr=[2001:db8::1]:9090", } for i := range gotContents { if gotContents[i] != wantContents[i] { @@ -299,30 +308,17 @@ func TestBuildDesiredRecordSet_TwoConnectorsSameKeyProduceSameName(t *testing.T) } } -func TestJoinIrohAddresses(t *testing.T) { +func TestSortIrohAddresses(t *testing.T) { tests := []struct { name string addrs []networkingv1alpha1.PublicKeyConnectorAddress - want string + want []networkingv1alpha1.PublicKeyConnectorAddress }{ - {name: "empty", addrs: nil, want: ""}, + {name: "empty", addrs: nil, want: []networkingv1alpha1.PublicKeyConnectorAddress{}}, { name: "single ipv4", addrs: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv4, Port: 8080}}, - want: "192.0.2.1:8080", - }, - { - name: "single ipv6 — bracketed", - addrs: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv6, Port: 9090}}, - want: "[2001:db8::1]:9090", - }, - { - name: "mixed ipv4 + ipv6", - addrs: []networkingv1alpha1.PublicKeyConnectorAddress{ - {Address: testIPv4, Port: 8080}, - {Address: testIPv6, Port: 9090}, - }, - want: "192.0.2.1:8080 [2001:db8::1]:9090", + want: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv4, Port: 8080}}, }, { name: "input order is normalized — agent may report in any order", @@ -330,7 +326,10 @@ func TestJoinIrohAddresses(t *testing.T) { {Address: testIPv6, Port: 9090}, {Address: testIPv4, Port: 8080}, }, - want: "192.0.2.1:8080 [2001:db8::1]:9090", + want: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: testIPv4, Port: 8080}, + {Address: testIPv6, Port: 9090}, + }, }, { name: "same address different ports — sorted by port", @@ -338,22 +337,31 @@ func TestJoinIrohAddresses(t *testing.T) { {Address: testIPv4, Port: 9090}, {Address: testIPv4, Port: 8080}, }, - want: "192.0.2.1:8080 192.0.2.1:9090", + want: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: testIPv4, Port: 8080}, + {Address: testIPv4, Port: 9090}, + }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := joinIrohAddresses(tt.addrs); got != tt.want { - t.Errorf("joinIrohAddresses = %q, want %q", got, tt.want) + got := sortIrohAddresses(tt.addrs) + if len(got) != len(tt.want) { + t.Fatalf("len = %d, want %d", len(got), len(tt.want)) + } + for i := range got { + if got[i] != tt.want[i] { + t.Errorf("sortIrohAddresses[%d] = %+v, want %+v", i, got[i], tt.want[i]) + } } }) } } -// TestJoinIrohAddresses_DoesNotMutateInput ensures we don't reorder the +// TestSortIrohAddresses_DoesNotMutateInput ensures we don't reorder the // caller's slice — Connector.Status fields are shared with watchers and // other reconciler passes. -func TestJoinIrohAddresses_DoesNotMutateInput(t *testing.T) { +func TestSortIrohAddresses_DoesNotMutateInput(t *testing.T) { original := []networkingv1alpha1.PublicKeyConnectorAddress{ {Address: testIPv6, Port: 9090}, {Address: testIPv4, Port: 8080}, @@ -362,7 +370,7 @@ func TestJoinIrohAddresses_DoesNotMutateInput(t *testing.T) { {Address: testIPv6, Port: 9090}, {Address: testIPv4, Port: 8080}, } - _ = joinIrohAddresses(original) + _ = sortIrohAddresses(original) for i := range want { if original[i] != want[i] { t.Fatalf("input was mutated at index %d: got %+v, want %+v", i, original[i], want[i]) From ec9e5e62b3a23d081329f9e3709465c4f95c5894 Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Fri, 1 May 2026 14:18:34 -0700 Subject: [PATCH 9/9] lint: preallocate entries slice in buildDesiredRecordSet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI golangci-lint flagged the var declaration as preallocatable now that we append in a loop. Capacity is 1 (relay) + len(pk.Addresses) — overallocates by 1 when relay is empty, which is fine. --- internal/controller/iroh_dns_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go index f77927d..feb11ee 100644 --- a/internal/controller/iroh_dns_controller.go +++ b/internal/controller/iroh_dns_controller.go @@ -301,7 +301,7 @@ func (r *IrohDNSReconciler) buildDesiredRecordSet(clusterName string, connector } ttl := int64(cfg.TTLSeconds) - var entries []dnsv1alpha1.RecordEntry + entries := make([]dnsv1alpha1.RecordEntry, 0, 1+len(pk.Addresses)) if pk.HomeRelay != "" { entries = append(entries, dnsv1alpha1.RecordEntry{ Name: recordName,