diff --git a/cmd/main.go b/cmd/main.go index 28fc80d..26747c8 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -421,6 +421,29 @@ func main() { os.Exit(1) } + var irohDownstream cluster.Cluster + if serverConfig.Connector.Iroh.DNSEnabled { + irohRestCfg, err := serverConfig.Connector.Iroh.DownstreamRestConfig() + if err != nil { + setupLog.Error(err, "unable to load iroh dns downstream kubeconfig") + os.Exit(1) + } + irohDownstream, err = cluster.New(irohRestCfg, func(o *cluster.Options) { + o.Scheme = scheme + }) + if err != nil { + setupLog.Error(err, "unable to build iroh dns downstream cluster") + os.Exit(1) + } + if err := (&controller.IrohDNSReconciler{ + Config: serverConfig, + Downstream: irohDownstream, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "IrohDNS") + os.Exit(1) + } + } + if serverConfig.Gateway.ShouldDeleteErroredChallenges() { if err := (&controller.ChallengeReconciler{ Config: serverConfig, @@ -521,6 +544,12 @@ func main() { return ignoreCanceled(downstreamCluster.Start(ctx)) }) + if irohDownstream != nil { + g.Go(func() error { + return ignoreCanceled(irohDownstream.Start(ctx)) + }) + } + setupLog.Info("starting multicluster manager") g.Go(func() error { return ignoreCanceled(mgr.Start(ctx)) diff --git a/internal/config/config.go b/internal/config/config.go index 3876afc..200cae2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -177,18 +177,23 @@ type IrohConnectorConfig struct { DownstreamKubeconfigPath string `json:"downstreamKubeconfigPath,omitempty"` // DNSZoneRef references the DNSZone (in the downstream cluster) that - // owns the names this controller manages. + // owns the names this controller manages. The actual DNS origin used + // for the FQDN is the zone's spec.domainName, not its metadata.name — + // the two need not agree. DNSZoneRef IrohDNSZoneRef `json:"dnsZoneRef,omitempty"` - // RecordPrefix is the leading DNS label of the discovery name. - // iroh uses "_iroh" by convention. + // RecordPrefix is the leading DNS label of the discovery name. iroh + // requires "_iroh" by convention. // // +default="_iroh" RecordPrefix string `json:"recordPrefix,omitempty"` - // BaseDomain is the suffix appended to the prefix and z32 EndpointId - // to form the full lookup name "..". - BaseDomain string `json:"baseDomain,omitempty"` + // RecordSuffix is appended after the z32 EndpointId, before the zone + // origin. Use it to nest discovery records under additional labels + // (e.g. set "connectors" with a zone for "example.com" to publish at + // "_iroh..connectors.example.com"). Empty means the records sit + // directly under the zone root. + RecordSuffix string `json:"recordSuffix,omitempty"` // TTLSeconds is the TTL written on each TXT record. // @@ -1108,9 +1113,6 @@ func (c *IrohConnectorConfig) validate() error { return nil } var errs []error - if c.BaseDomain == "" { - errs = append(errs, errors.New("baseDomain is required when dnsEnabled is true")) - } if c.DNSZoneRef.Name == "" { errs = append(errs, errors.New("dnsZoneRef.name is required when dnsEnabled is true")) } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index c0ad2b4..b8807ce 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -17,7 +17,6 @@ func TestNetworkServicesOperator_Validate_IrohDisabled(t *testing.T) { func TestNetworkServicesOperator_Validate_IrohEnabled(t *testing.T) { full := IrohConnectorConfig{ DNSEnabled: true, - BaseDomain: "datumconnect.net", DNSZoneRef: IrohDNSZoneRef{Namespace: "datum-dns", Name: "datumconnect-net"}, } @@ -27,11 +26,6 @@ func TestNetworkServicesOperator_Validate_IrohEnabled(t *testing.T) { wantSub string }{ {name: "all required fields set"}, - { - name: "missing baseDomain", - mutate: func(c *IrohConnectorConfig) { c.BaseDomain = "" }, - wantSub: "baseDomain is required", - }, { name: "missing dnsZoneRef.name", mutate: func(c *IrohConnectorConfig) { c.DNSZoneRef.Name = "" }, @@ -48,6 +42,10 @@ func TestNetworkServicesOperator_Validate_IrohEnabled(t *testing.T) { c.DownstreamKubeconfigPath = "" }, }, + { + name: "recordSuffix is optional (records sit under zone root)", + mutate: func(c *IrohConnectorConfig) { c.RecordSuffix = "" }, + }, } for _, tt := range tests { @@ -82,10 +80,9 @@ func TestNetworkServicesOperator_Validate_IrohEnabledAggregatesErrors(t *testing if err == nil { t.Fatal("expected error, got nil") } - // errors.Join joins distinct messages with newlines; all five required + // errors.Join joins distinct messages with newlines; both required // fields should be surfaced. for _, want := range []string{ - "baseDomain is required", "dnsZoneRef.name is required", "dnsZoneRef.namespace is required", } { diff --git a/internal/controller/iroh_dns_controller.go b/internal/controller/iroh_dns_controller.go new file mode 100644 index 0000000..feb11ee --- /dev/null +++ b/internal/controller/iroh_dns_controller.go @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package controller + +import ( + "cmp" + "context" + "fmt" + "net" + "slices" + "strconv" + "strings" + + coordinationv1 "k8s.io/api/coordination/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/cluster" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder" + mchandler "sigs.k8s.io/multicluster-runtime/pkg/handler" + mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager" + mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile" + mcsource "sigs.k8s.io/multicluster-runtime/pkg/source" + + dnsv1alpha1 "go.miloapis.com/dns-operator/api/v1alpha1" + + networkingv1alpha1 "go.datum.net/network-services-operator/api/v1alpha1" + "go.datum.net/network-services-operator/internal/config" + "go.datum.net/network-services-operator/internal/iroh" +) + +const ( + irohDNSFinalizer = "networking.datumapis.com/iroh-dns-cleanup" + irohDNSFieldManager = "network-services-operator/iroh-dns" + + // labels stamped on every DNSRecordSet we manage. Used both for + // observability and (more importantly) so the downstream watch + // can route an event back to the owning Connector across clusters. + irohDNSManagedByLabelValue = "networking.datumapis.com" + irohDNSClaimedByUIDLabel = "networking.datumapis.com/iroh-dns-claimed-by-uid" + irohDNSConnectorClusterLabel = "networking.datumapis.com/iroh-dns-connector-cluster" + irohDNSConnectorNamespaceLabel = "networking.datumapis.com/iroh-dns-connector-namespace" + irohDNSConnectorNameLabel = "networking.datumapis.com/iroh-dns-connector-name" + + // Connector status condition surfacing whether *this* Connector is the + // one publishing the iroh DNS record for its endpoint id. + connectorConditionIrohDNSPublished = "IrohDNSPublished" + + connectorReasonIrohOwner = "Owner" + connectorReasonIrohDeferredToOwner = "DeferredToOwner" + connectorReasonIrohPending = "Pending" +) + +// allowedIrohControllerNames is the set of ConnectorClass.spec.controllerName +// values for which we publish iroh DNS records. Both names refer to the same +// controller; "datum-connect" is the legacy name kept alive while older +// desktop builds churn out. +var allowedIrohControllerNames = map[string]struct{}{ + "networking.datumapis.com/datum-connect": {}, + "networking.datumapis.com/iroh-quic-tunnel": {}, +} + +// IrohDNSReconciler watches Connectors backed by an iroh-routed +// ConnectorClass and maintains a single downstream DNSRecordSet per iroh +// endpoint id (z32-encoded public key) carrying the iroh DNS-discovery +// TXT records. Multiple Connectors that share the same iroh keypair (e.g. +// the same agent registered against two projects) collapse to one +// DNSRecordSet — the first to claim wins, and the loser surfaces a +// DeferredToOwner condition rather than fighting at the DNS layer. +type IrohDNSReconciler struct { + mgr mcmanager.Manager + Config config.NetworkServicesOperator + Downstream cluster.Cluster +} + +// +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectors,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectors/status,verbs=update;patch +// +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectors/finalizers,verbs=update +// +kubebuilder:rbac:groups=networking.datumapis.com,resources=connectorclasses,verbs=get;list;watch + +func (r *IrohDNSReconciler) Reconcile(ctx context.Context, req mcreconcile.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx, "cluster", req.ClusterName) + ctx = log.IntoContext(ctx, logger) + + cl, err := r.mgr.GetCluster(ctx, req.ClusterName) + if err != nil { + return ctrl.Result{}, err + } + + var connector networkingv1alpha1.Connector + if err := cl.GetClient().Get(ctx, req.NamespacedName, &connector); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + if !connector.DeletionTimestamp.IsZero() { + return r.handleDeletion(ctx, cl, &connector) + } + + matches, err := r.classRoutesToIroh(ctx, cl, &connector) + if err != nil { + return ctrl.Result{}, err + } + if !matches { + // Class doesn't route here. If we previously held a claim, release it. + return ctrl.Result{}, r.releaseIfOwner(ctx, &connector) + } + + if !controllerutil.ContainsFinalizer(&connector, irohDNSFinalizer) { + controllerutil.AddFinalizer(&connector, irohDNSFinalizer) + if err := cl.GetClient().Update(ctx, &connector); err != nil { + return ctrl.Result{}, fmt.Errorf("add finalizer: %w", err) + } + return ctrl.Result{}, nil + } + + desired, ok, err := r.buildDesiredRecordSet(req.ClusterName, &connector) + if err != nil { + return ctrl.Result{}, err + } + if !ok { + // Status not yet populated by the agent. If we previously claimed + // this endpoint id, release so a sibling can take over. + if err := r.releaseIfOwner(ctx, &connector); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, r.setPublishedCondition(ctx, cl, &connector, metav1.ConditionFalse, connectorReasonIrohPending, "Connector status does not yet carry connection details.") + } + + return ctrl.Result{}, r.applyClaim(ctx, cl, &connector, desired) +} + +// applyClaim implements the claim-then-write loop: +// - Get the DNSRecordSet at the deterministic z32-derived name. +// - Not found → Create with our claim. AlreadyExists means a sibling beat +// us; we re-fetch and continue. +// - Found, our claim → SSA refresh content. +// - Found, foreign claim → defer (no write) and surface a status +// condition naming the owner. +func (r *IrohDNSReconciler) applyClaim(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector, desired *dnsv1alpha1.DNSRecordSet) error { + key := client.ObjectKeyFromObject(desired) + var existing dnsv1alpha1.DNSRecordSet + err := r.Downstream.GetClient().Get(ctx, key, &existing) + + switch { + case apierrors.IsNotFound(err): + if err := r.Downstream.GetClient().Create(ctx, desired); err != nil { + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create DNSRecordSet: %w", err) + } + // Sibling raced us. Refetch and fall through to the foreign-claim + // branch on next reconcile. + return nil + } + return r.setPublishedCondition(ctx, cl, connector, metav1.ConditionTrue, connectorReasonIrohOwner, "Owns iroh DNS record.") + + case err != nil: + return fmt.Errorf("get DNSRecordSet: %w", err) + } + + currentClaim := existing.Labels[irohDNSClaimedByUIDLabel] + if currentClaim != string(connector.UID) { + ownerCluster := decodeIrohClusterLabel(existing.Labels[irohDNSConnectorClusterLabel]) + ownerRef := ownerCluster + "/" + existing.Labels[irohDNSConnectorNamespaceLabel] + "/" + existing.Labels[irohDNSConnectorNameLabel] + return r.setPublishedCondition(ctx, cl, connector, metav1.ConditionFalse, connectorReasonIrohDeferredToOwner, + fmt.Sprintf("iroh DNS record is owned by Connector %s (uid %s).", ownerRef, currentClaim)) + } + + // We own it. SSA the desired content. + if err := r.Downstream.GetClient().Patch(ctx, desired, client.Apply, client.FieldOwner(irohDNSFieldManager), client.ForceOwnership); err != nil { + return fmt.Errorf("apply DNSRecordSet: %w", err) + } + return r.setPublishedCondition(ctx, cl, connector, metav1.ConditionTrue, connectorReasonIrohOwner, "Owns iroh DNS record.") +} + +// handleDeletion releases the claim (if held) and removes the finalizer. +func (r *IrohDNSReconciler) handleDeletion(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector) (ctrl.Result, error) { + if !controllerutil.ContainsFinalizer(connector, irohDNSFinalizer) { + return ctrl.Result{}, nil + } + if err := r.releaseIfOwner(ctx, connector); err != nil { + return ctrl.Result{}, err + } + controllerutil.RemoveFinalizer(connector, irohDNSFinalizer) + if err := cl.GetClient().Update(ctx, connector); err != nil { + return ctrl.Result{}, fmt.Errorf("remove finalizer: %w", err) + } + return ctrl.Result{}, nil +} + +// releaseIfOwner deletes the DNSRecordSet for this Connector's endpoint id +// only if we currently hold the claim. Otherwise it's a no-op (the foreign +// owner manages the record's lifecycle). +// +// We compute the DNSRecordSet name from the Connector's status — if the +// status is empty we have no z32 to derive, which means we never could +// have created a record in the first place, so there's nothing to release. +func (r *IrohDNSReconciler) releaseIfOwner(ctx context.Context, connector *networkingv1alpha1.Connector) error { + z32, err := connectorEndpointZ32(connector) + if err != nil || z32 == "" { + return nil + } + key := client.ObjectKey{ + Namespace: r.Config.Connector.Iroh.DNSZoneRef.Namespace, + Name: irohDNSRecordSetName(z32), + } + var existing dnsv1alpha1.DNSRecordSet + if err := r.Downstream.GetClient().Get(ctx, key, &existing); err != nil { + if apierrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("get DNSRecordSet for release: %w", err) + } + if existing.Labels[irohDNSClaimedByUIDLabel] != string(connector.UID) { + return nil + } + if err := r.Downstream.GetClient().Delete(ctx, &existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("delete DNSRecordSet: %w", err) + } + return nil +} + +func (r *IrohDNSReconciler) classRoutesToIroh(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector) (bool, error) { + if connector.Spec.ConnectorClassName == "" { + return false, nil + } + var class networkingv1alpha1.ConnectorClass + if err := cl.GetClient().Get(ctx, client.ObjectKey{Name: connector.Spec.ConnectorClassName}, &class); err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, fmt.Errorf("get connectorclass: %w", err) + } + _, ok := allowedIrohControllerNames[class.Spec.ControllerName] + return ok, nil +} + +// irohDNSRecordSetName returns the deterministic DNSRecordSet name for an +// iroh endpoint id. One name per endpoint id means multiple Connectors +// reusing the same key collapse onto a single record. +func irohDNSRecordSetName(z32 string) string { + return "iroh-" + z32 +} + +// encodeIrohClusterLabel mirrors the inline pattern in +// downstreamclient/mappednamespace.go: multicluster-runtime cluster +// names start with "/" (invalid as a k8s label value), so we map "/" +// to "_" and prefix "cluster-" to produce a label-safe form. +func encodeIrohClusterLabel(clusterName string) string { + return "cluster-" + strings.ReplaceAll(clusterName, "/", "_") +} + +func decodeIrohClusterLabel(label string) string { + return strings.TrimPrefix(strings.ReplaceAll(label, "_", "/"), "cluster-") +} + +func connectorEndpointZ32(connector *networkingv1alpha1.Connector) (string, error) { + if connector.Status.ConnectionDetails == nil || connector.Status.ConnectionDetails.PublicKey == nil { + return "", nil + } + pk := connector.Status.ConnectionDetails.PublicKey + if pk.Id == "" { + return "", nil + } + return iroh.EndpointHexToZ32(pk.Id) +} + +// buildDesiredRecordSet builds the DNSRecordSet we want present in the +// downstream cluster. The second return value is false when the Connector +// status doesn't yet carry enough data to publish a useful record. +func (r *IrohDNSReconciler) buildDesiredRecordSet(clusterName string, connector *networkingv1alpha1.Connector) (*dnsv1alpha1.DNSRecordSet, bool, error) { + z32, err := connectorEndpointZ32(connector) + if err != nil { + return nil, false, fmt.Errorf("encode endpoint id: %w", err) + } + if z32 == "" { + return nil, false, nil + } + pk := connector.Status.ConnectionDetails.PublicKey + if pk.HomeRelay == "" && len(pk.Addresses) == 0 { + return nil, false, nil + } + + cfg := r.Config.Connector.Iroh + // RecordEntry.Name is relative to the DNSZone — dns-operator + // qualifies it with the zone's spec.domainName at apply time. So + // we never include the zone origin here; we only express the labels + // that should sit between iroh's "_iroh" prefix and the zone root. + recordName := cfg.RecordPrefix + "." + z32 + if cfg.RecordSuffix != "" { + recordName = recordName + "." + cfg.RecordSuffix + } + ttl := int64(cfg.TTLSeconds) + + entries := make([]dnsv1alpha1.RecordEntry, 0, 1+len(pk.Addresses)) + if pk.HomeRelay != "" { + entries = append(entries, dnsv1alpha1.RecordEntry{ + Name: recordName, + TTL: &ttl, + TXT: &dnsv1alpha1.TXTRecordSpec{Content: "relay=" + pk.HomeRelay}, + }) + } + // One TXT entry per direct address. iroh's parser expects every + // IrohAttr::Addr value to be exactly one socket address — it calls + // SocketAddr::from_str on the value as-is and silently drops failures + // (iroh-relay-0.95.1/src/endpoint_info.rs:307-312). Joining multiple + // addrs with whitespace into a single TXT line makes the whole line + // fail to parse, so iroh sees no direct addresses. + for _, a := range sortIrohAddresses(pk.Addresses) { + entries = append(entries, dnsv1alpha1.RecordEntry{ + Name: recordName, + TTL: &ttl, + TXT: &dnsv1alpha1.TXTRecordSpec{Content: "addr=" + net.JoinHostPort(a.Address, strconv.Itoa(int(a.Port)))}, + }) + } + + drs := &dnsv1alpha1.DNSRecordSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: dnsv1alpha1.GroupVersion.String(), + Kind: "DNSRecordSet", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: irohDNSRecordSetName(z32), + Namespace: cfg.DNSZoneRef.Namespace, + Labels: map[string]string{ + "app.kubernetes.io/managed-by": irohDNSManagedByLabelValue, + irohDNSClaimedByUIDLabel: string(connector.UID), + irohDNSConnectorClusterLabel: encodeIrohClusterLabel(clusterName), + irohDNSConnectorNamespaceLabel: connector.Namespace, + irohDNSConnectorNameLabel: connector.Name, + }, + }, + Spec: dnsv1alpha1.DNSRecordSetSpec{ + DNSZoneRef: corev1.LocalObjectReference{Name: cfg.DNSZoneRef.Name}, + RecordType: dnsv1alpha1.RRTypeTXT, + Records: entries, + }, + } + return drs, true, nil +} + +// sortIrohAddresses returns a deterministically-ordered copy of the +// input — by (address, port) lexicographic. The agent's +// iroh::Endpoint::endpoint_addr().ip_addrs() iterator is iter-over-set +// and not order-stable, so sorting here means the same set of +// endpoints produces the same DNS content across heartbeats and SSA +// stays a no-op when nothing actually changed. +func sortIrohAddresses(addrs []networkingv1alpha1.PublicKeyConnectorAddress) []networkingv1alpha1.PublicKeyConnectorAddress { + sorted := slices.Clone(addrs) + slices.SortFunc(sorted, func(a, b networkingv1alpha1.PublicKeyConnectorAddress) int { + return cmp.Or( + cmp.Compare(a.Address, b.Address), + cmp.Compare(a.Port, b.Port), + ) + }) + return sorted +} + +func (r *IrohDNSReconciler) setPublishedCondition(ctx context.Context, cl cluster.Cluster, connector *networkingv1alpha1.Connector, status metav1.ConditionStatus, reason, message string) error { + cond := metav1.Condition{ + Type: connectorConditionIrohDNSPublished, + Status: status, + Reason: reason, + Message: message, + ObservedGeneration: connector.Generation, + } + if !apimeta.SetStatusCondition(&connector.Status.Conditions, cond) { + return nil + } + if err := cl.GetClient().Status().Update(ctx, connector); err != nil { + return fmt.Errorf("update connector status: %w", err) + } + return nil +} + +// SetupWithManager wires the reconciler. Watches: +// +// - Connector (For) and ConnectorClass (Watches) — the primary multicluster +// event sources. +// +// - Lease (Watches with EnqueueRequestForOwner) — agent heartbeats renew the +// Connector's Lease on every interval; that update fires our reconcile +// even when the Connector itself hasn't changed. This is the load-bearing +// trigger for sibling handover: when an owner Connector is deleted and +// its DNSRecordSet is GC'd, every sibling's next lease renewal drives its +// reconcile, and one of them wins the Create race for the now-empty z32. +// Bound on handover ≈ leaseDurationSeconds. +// +// - DNSRecordSet on the downstream cluster — drift detection. Mapper +// enqueues the *current owner* Connector identified by the labels on the +// DNSRecordSet, catching cases like a manual external delete of the +// record. Sibling handover does NOT flow through this watch: +// multicluster-runtime's manager exposes GetCluster(name) but no +// enumeration, so a downstream event can't fan out to siblings across +// project clusters. That's the Lease watch's job. +func (r *IrohDNSReconciler) SetupWithManager(mgr mcmanager.Manager) error { + r.mgr = mgr + + downstreamSource := mcsource.Kind( + &dnsv1alpha1.DNSRecordSet{}, + func(_ string, _ cluster.Cluster) handler.TypedEventHandler[*dnsv1alpha1.DNSRecordSet, mcreconcile.Request] { + return handler.TypedEnqueueRequestsFromMapFunc(func(_ context.Context, drs *dnsv1alpha1.DNSRecordSet) []mcreconcile.Request { + name := drs.Labels[irohDNSConnectorNameLabel] + ns := drs.Labels[irohDNSConnectorNamespaceLabel] + if name == "" || ns == "" { + return nil + } + clusterName := decodeIrohClusterLabel(drs.Labels[irohDNSConnectorClusterLabel]) + return []mcreconcile.Request{{ + ClusterName: clusterName, + Request: ctrl.Request{NamespacedName: types.NamespacedName{Namespace: ns, Name: name}}, + }} + }) + }, + ) + downstreamClusterSource, _, _ := downstreamSource.ForCluster("", r.Downstream) + + return mcbuilder.ControllerManagedBy(mgr). + For(&networkingv1alpha1.Connector{}). + Watches( + &networkingv1alpha1.ConnectorClass{}, + func(clusterName string, cl cluster.Cluster) handler.TypedEventHandler[client.Object, mcreconcile.Request] { + return handler.TypedEnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []mcreconcile.Request { + logger := log.FromContext(ctx) + class, ok := obj.(*networkingv1alpha1.ConnectorClass) + if !ok { + return nil + } + var connectors networkingv1alpha1.ConnectorList + if err := cl.GetClient().List(ctx, &connectors); err != nil { + logger.Error(err, "list Connectors for ConnectorClass watch", "connectorClass", class.Name) + return nil + } + var requests []mcreconcile.Request + for i := range connectors.Items { + c := &connectors.Items[i] + if c.Spec.ConnectorClassName != class.Name { + continue + } + requests = append(requests, mcreconcile.Request{ + ClusterName: clusterName, + Request: ctrl.Request{NamespacedName: client.ObjectKeyFromObject(c)}, + }) + } + return requests + }) + }, + ). + Watches( + &coordinationv1.Lease{}, + mchandler.EnqueueRequestForOwner(&networkingv1alpha1.Connector{}, handler.OnlyControllerOwner()), + ). + WatchesRawSource(downstreamClusterSource). + Named("iroh-dns"). + Complete(r) +} diff --git a/internal/controller/iroh_dns_controller_test.go b/internal/controller/iroh_dns_controller_test.go new file mode 100644 index 0000000..faded24 --- /dev/null +++ b/internal/controller/iroh_dns_controller_test.go @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package controller + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + dnsv1alpha1 "go.miloapis.com/dns-operator/api/v1alpha1" + + networkingv1alpha1 "go.datum.net/network-services-operator/api/v1alpha1" + "go.datum.net/network-services-operator/internal/config" +) + +// Real iroh public key from iroh-base/src/key.rs SecretKey.public, chosen +// because it has a known z32 form we can pin against. +const ( + testEndpointHex = "f120d52e42bfcee750508baf28900acac85ad3f397ab4bb653b32be505c32d39" + testEndpointZ32 = "6ropkm1nz98qqwnotqz1tryk3mrfiw9u16iwzp1usci6kbqdfwho" + + // multicluster-runtime cluster names start with "/" — set the test + // vector that way so the label encoding is exercised. + testClusterName = "/test-project-staging" + testClusterNameEncoded = "cluster-_test-project-staging" + testConnectorUID = "00000000-0000-0000-0000-000000000abc" + + testRelayURL = "https://relay.example.com" + testIPv4 = "192.0.2.1" + testIPv6 = "2001:db8::1" +) + +func newReconciler() *IrohDNSReconciler { + return &IrohDNSReconciler{ + Config: config.NetworkServicesOperator{ + Connector: config.ConnectorConfig{ + Iroh: config.IrohConnectorConfig{ + DNSEnabled: true, + RecordPrefix: "_iroh", + RecordSuffix: "connectors", + TTLSeconds: 30, + DNSZoneRef: config.IrohDNSZoneRef{ + Namespace: "datum-dns", + Name: "datumconnect-net", + }, + }, + }, + }, + } +} + +func newConnector(pk *networkingv1alpha1.ConnectorConnectionDetailsPublicKey) *networkingv1alpha1.Connector { + c := &networkingv1alpha1.Connector{ + ObjectMeta: metav1.ObjectMeta{ + Name: "edge-1", + Namespace: "default", + UID: types.UID(testConnectorUID), + }, + Spec: networkingv1alpha1.ConnectorSpec{ + ConnectorClassName: "datum-connect", + }, + } + if pk != nil { + c.Status.ConnectionDetails = &networkingv1alpha1.ConnectorConnectionDetails{ + Type: networkingv1alpha1.PublicKeyConnectorConnectionType, + PublicKey: pk, + } + } + return c +} + +func TestBuildDesiredRecordSet_StatusGating(t *testing.T) { + tests := []struct { + name string + pk *networkingv1alpha1.ConnectorConnectionDetailsPublicKey + want bool + }{ + {name: "no connection details", pk: nil, want: false}, + {name: "no public key data — empty struct", pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{}, want: false}, + { + name: "id without relay or addresses", + pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{Id: testEndpointHex}, + want: false, + }, + { + name: "id with relay only — publishes", + pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: testRelayURL, + }, + want: true, + }, + { + name: "id with addresses only — publishes", + pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv4, Port: 8080}}, + }, + want: true, + }, + { + name: "id with both — publishes", + pk: &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: testRelayURL, + Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv4, Port: 8080}}, + }, + want: true, + }, + } + + r := newReconciler() + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, ok, err := r.buildDesiredRecordSet(testClusterName, newConnector(tt.pk)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ok != tt.want { + t.Fatalf("ok = %v, want %v", ok, tt.want) + } + }) + } +} + +func TestBuildDesiredRecordSet_RecordContents(t *testing.T) { + r := newReconciler() + conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: testRelayURL, + Addresses: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: testIPv4, Port: 8080}, + {Address: testIPv6, Port: 9090}, + }, + }) + + drs, ok, err := r.buildDesiredRecordSet(testClusterName, conn) + if err != nil || !ok { + t.Fatalf("buildDesiredRecordSet failed: ok=%v err=%v", ok, err) + } + + // DNSRecordSet name is keyed by z32 endpoint id (one record per + // endpoint, not per Connector UID) — see claim-based ownership design. + wantName := "iroh-" + testEndpointZ32 + if drs.Name != wantName { + t.Errorf("Name = %q, want %q", drs.Name, wantName) + } + if drs.Namespace != "datum-dns" { + t.Errorf("Namespace = %q, want %q", drs.Namespace, "datum-dns") + } + if drs.Spec.RecordType != dnsv1alpha1.RRTypeTXT { + t.Errorf("RecordType = %q, want %q", drs.Spec.RecordType, dnsv1alpha1.RRTypeTXT) + } + if drs.Spec.DNSZoneRef.Name != "datumconnect-net" { + t.Errorf("DNSZoneRef.Name = %q, want %q", drs.Spec.DNSZoneRef.Name, "datumconnect-net") + } + + wantRecordName := "_iroh." + testEndpointZ32 + ".connectors" + // One TXT entry per attribute: relay + one addr per direct address. + // iroh's parser SocketAddr::from_str(value) drops anything that isn't + // exactly one socket address, so we cannot pack multiple addrs into + // one space-separated value. + if len(drs.Spec.Records) != 3 { + t.Fatalf("Records count = %d, want 3 (relay + 2× addr)", len(drs.Spec.Records)) + } + gotContents := []string{ + drs.Spec.Records[0].TXT.Content, + drs.Spec.Records[1].TXT.Content, + drs.Spec.Records[2].TXT.Content, + } + wantContents := []string{ + "relay=https://relay.example.com", + "addr=192.0.2.1:8080", + "addr=[2001:db8::1]:9090", + } + for i := range gotContents { + if gotContents[i] != wantContents[i] { + t.Errorf("Records[%d].TXT.Content = %q, want %q", i, gotContents[i], wantContents[i]) + } + if drs.Spec.Records[i].Name != wantRecordName { + t.Errorf("Records[%d].Name = %q, want %q", i, drs.Spec.Records[i].Name, wantRecordName) + } + if drs.Spec.Records[i].TTL == nil || *drs.Spec.Records[i].TTL != 30 { + t.Errorf("Records[%d].TTL = %v, want 30", i, drs.Spec.Records[i].TTL) + } + } + + // Labels track the claim and the owner Connector identity. The watch + // on downstream DNSRecordSet uses these to enqueue the owner on changes. + for k, v := range map[string]string{ + "app.kubernetes.io/managed-by": irohDNSManagedByLabelValue, + irohDNSClaimedByUIDLabel: testConnectorUID, + irohDNSConnectorClusterLabel: testClusterNameEncoded, + irohDNSConnectorNamespaceLabel: conn.Namespace, + irohDNSConnectorNameLabel: conn.Name, + } { + if drs.Labels[k] != v { + t.Errorf("label %q = %q, want %q", k, drs.Labels[k], v) + } + } +} + +func TestEncodeDecodeIrohClusterLabel(t *testing.T) { + tests := []string{ + "", + "/test-project-staging", + "/zachs-project-z5pegw", + "plain-no-slashes", + "/with/multiple/slashes", + } + for _, want := range tests { + t.Run(want, func(t *testing.T) { + got := decodeIrohClusterLabel(encodeIrohClusterLabel(want)) + if got != want { + t.Errorf("round-trip mismatch: encode(%q) -> decode = %q", want, got) + } + }) + } +} + +func TestBuildDesiredRecordSet_RelayOnlyOmitsAddrEntry(t *testing.T) { + r := newReconciler() + conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: testRelayURL, + }) + + drs, _, err := r.buildDesiredRecordSet(testClusterName, conn) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(drs.Spec.Records) != 1 { + t.Fatalf("Records count = %d, want 1 (relay only)", len(drs.Spec.Records)) + } + if drs.Spec.Records[0].TXT.Content != "relay=https://relay.example.com" { + t.Errorf("Content = %q", drs.Spec.Records[0].TXT.Content) + } +} + +func TestBuildDesiredRecordSet_EmptySuffixPutsRecordsUnderZoneRoot(t *testing.T) { + r := newReconciler() + r.Config.Connector.Iroh.RecordSuffix = "" + conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: testRelayURL, + }) + + drs, _, err := r.buildDesiredRecordSet(testClusterName, conn) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + want := "_iroh." + testEndpointZ32 + if drs.Spec.Records[0].Name != want { + t.Errorf("record name = %q, want %q (no trailing suffix)", drs.Spec.Records[0].Name, want) + } +} + +func TestBuildDesiredRecordSet_InvalidEndpointId(t *testing.T) { + r := newReconciler() + conn := newConnector(&networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: "not-hex", + HomeRelay: testRelayURL, + }) + if _, _, err := r.buildDesiredRecordSet(testClusterName, conn); err == nil { + t.Fatal("expected error for non-hex endpoint id, got nil") + } +} + +// TestBuildDesiredRecordSet_TwoConnectorsSameKeyProduceSameName verifies the +// load-bearing claim-based property: two distinct Connectors that share an +// iroh keypair compute the same DNSRecordSet name. This is what lets the +// claim-based reconciler dedupe them. +func TestBuildDesiredRecordSet_TwoConnectorsSameKeyProduceSameName(t *testing.T) { + r := newReconciler() + pk := &networkingv1alpha1.ConnectorConnectionDetailsPublicKey{ + Id: testEndpointHex, + HomeRelay: testRelayURL, + } + + a := newConnector(pk) + a.UID = types.UID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa") + a.Name = "edge-a" + + b := newConnector(pk) + b.UID = types.UID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb") + b.Name = "edge-b" + + drsA, _, err := r.buildDesiredRecordSet("cluster-a", a) + if err != nil { + t.Fatalf("buildDesiredRecordSet(a): %v", err) + } + drsB, _, err := r.buildDesiredRecordSet("cluster-b", b) + if err != nil { + t.Fatalf("buildDesiredRecordSet(b): %v", err) + } + + if drsA.Name != drsB.Name { + t.Errorf("expected matching DNSRecordSet name, got A=%q B=%q", drsA.Name, drsB.Name) + } + // Each Connector still stamps its own claim and identity labels — the + // reconciler uses these to detect "is this DNSRecordSet mine". + if drsA.Labels[irohDNSClaimedByUIDLabel] == drsB.Labels[irohDNSClaimedByUIDLabel] { + t.Errorf("expected distinct claim labels, got both = %q", drsA.Labels[irohDNSClaimedByUIDLabel]) + } + if drsA.Labels[irohDNSConnectorClusterLabel] == drsB.Labels[irohDNSConnectorClusterLabel] { + t.Errorf("expected distinct cluster labels, got both = %q", drsA.Labels[irohDNSConnectorClusterLabel]) + } +} + +func TestSortIrohAddresses(t *testing.T) { + tests := []struct { + name string + addrs []networkingv1alpha1.PublicKeyConnectorAddress + want []networkingv1alpha1.PublicKeyConnectorAddress + }{ + {name: "empty", addrs: nil, want: []networkingv1alpha1.PublicKeyConnectorAddress{}}, + { + name: "single ipv4", + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv4, Port: 8080}}, + want: []networkingv1alpha1.PublicKeyConnectorAddress{{Address: testIPv4, Port: 8080}}, + }, + { + name: "input order is normalized — agent may report in any order", + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: testIPv6, Port: 9090}, + {Address: testIPv4, Port: 8080}, + }, + want: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: testIPv4, Port: 8080}, + {Address: testIPv6, Port: 9090}, + }, + }, + { + name: "same address different ports — sorted by port", + addrs: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: testIPv4, Port: 9090}, + {Address: testIPv4, Port: 8080}, + }, + want: []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: testIPv4, Port: 8080}, + {Address: testIPv4, Port: 9090}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := sortIrohAddresses(tt.addrs) + if len(got) != len(tt.want) { + t.Fatalf("len = %d, want %d", len(got), len(tt.want)) + } + for i := range got { + if got[i] != tt.want[i] { + t.Errorf("sortIrohAddresses[%d] = %+v, want %+v", i, got[i], tt.want[i]) + } + } + }) + } +} + +// TestSortIrohAddresses_DoesNotMutateInput ensures we don't reorder the +// caller's slice — Connector.Status fields are shared with watchers and +// other reconciler passes. +func TestSortIrohAddresses_DoesNotMutateInput(t *testing.T) { + original := []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: testIPv6, Port: 9090}, + {Address: testIPv4, Port: 8080}, + } + want := []networkingv1alpha1.PublicKeyConnectorAddress{ + {Address: testIPv6, Port: 9090}, + {Address: testIPv4, Port: 8080}, + } + _ = sortIrohAddresses(original) + for i := range want { + if original[i] != want[i] { + t.Fatalf("input was mutated at index %d: got %+v, want %+v", i, original[i], want[i]) + } + } +}