From 7b92a2f4b7d63749e672117d126f10b6c1251876 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Tue, 12 May 2026 05:56:59 -0400
Subject: [PATCH] docs: restore non-obvious comments after gocyclo refactor

Restore and improve comments that document hidden constraints, edge
cases, and design decisions across files affected by the gocyclo
complexity refactor in PR #8309.

Changes:
- Add missing requeue comment in AWS private link deletion flow
- Restore OAuth IDP conversion comments: GitLab OIDC requirement,
  Keystone ID, IBM Cloud special case, ID claim safety, challenge
  flow validation constraint
- Fix comment formatting (//Handle -> // Handle) in v2 OAuth IDP
- Add ARO-HCP OpenAPI spec link for Azure resource type constant
- Restore CAPI label propagation doc comment with TODO and JIRA link
  (HOSTEDCP-971) explaining why labels/taints bypass rolling upgrades
- Restore globalPS DaemonSet scheduling comment

Signed-off-by: Bryan Cox <brcox@redhat.com>

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cmd/cluster/core/create.go                    | 10 +++++++
 cmd/install/assets/hypershift_operator.go     |  3 +++
 cmd/install/install.go                        |  8 ++++++
 .../awsprivatelink_controller.go              |  6 +++++
 .../azureprivatelinkservice/controller.go     | 13 +++++++++
 .../hostedcontrolplane_controller.go          | 18 +++++++++++++
 .../hostedcontrolplane/oauth/idp_convert.go   |  9 +++++--
 .../v2/oauth/idp_convert.go                   |  2 +-
 .../controllers/resources/resources.go        | 11 ++++++++
 control-plane-operator/main.go                | 11 +++++++-
 .../snapshot_controller.go                    |  3 +++
 .../hostedcluster/etcd_recovery.go            |  3 +++
 .../hostedcluster/metrics/metrics.go          |  7 ++++-
 .../hostedcluster/network_policies.go         | 23 ++++++++++++++--
 .../hostedclustersizing_controller.go         | 14 +++++++++-
 .../controllers/nodepool/aws.go               | 10 ++++++-
 .../controllers/nodepool/capi.go              | 18 +++++++++++++
 .../controllers/nodepool/metrics/metrics.go   |  5 ++++
 .../controllers/platform/aws/controller.go    |  3 +++
 .../controllers/scheduler/aws/autoscaler.go   | 27 +++++++++++++++++++
 .../aws/dedicated_request_serving_nodes.go    |  9 +++++++
 hypershift-operator/main.go                   |  9 +++++++
 .../controllers/local_ignitionprovider.go     | 20 ++++++++++++++
 23 files changed, 233 insertions(+), 9 deletions(-)

diff --git a/cmd/cluster/core/create.go b/cmd/cluster/core/create.go
index 4c3ccc771e0..8296268bd02 100644
--- a/cmd/cluster/core/create.go
+++ b/cmd/cluster/core/create.go
@@ -355,6 +355,7 @@ func prototypeResources(ctx context.Context, opts *CreateOptions) (*resources, e
 }
 
 func resolveReleaseImage(ctx context.Context, opts *CreateOptions) error {
+	// allow client side defaulting when release image is empty but release stream is set.
 	if len(opts.ReleaseImage) != 0 || len(opts.ReleaseStream) == 0 {
 		return nil
 	}
@@ -386,6 +387,7 @@ func parseKeyValuePairs(items []string, kind string) (map[string]string, error)
 }
 
 func resolvePullSecret(opts *CreateOptions) ([]byte, error) {
+	// overrides if pullSecretFile is set
 	if len(opts.PullSecretFile) > 0 {
 		data, err := os.ReadFile(opts.PullSecretFile)
 		if err != nil {
@@ -430,6 +432,7 @@ func applyEtcdConfig(cluster *hyperv1.HostedCluster, opts *CreateOptions) error
 func applySSHKey(prototype *resources, opts *CreateOptions) error {
 	sshKey, sshPrivateKey := opts.PublicKey, opts.PrivateKey
 	var err error
+	// overrides secret if SSHKeyFile is set
 	if len(opts.SSHKeyFile) > 0 {
 		if opts.GenerateSSH {
 			return fmt.Errorf("--generate-ssh and --ssh-key cannot be specified together")
@@ -468,6 +471,8 @@ func applySSHKey(prototype *resources, opts *CreateOptions) error {
 }
 
 func applyPausedUntil(cluster *hyperv1.HostedCluster, opts *CreateOptions) error {
+	// validate pausedUntil value
+	// valid values are either "true" or RFC3339 format date
 	if len(opts.PausedUntil) == 0 || opts.PausedUntil == "true" {
 		return nil
 	}
@@ -725,14 +730,18 @@ func (opts *RawCreateOptions) Validate(ctx context.Context) (*ValidatedCreateOpt
 		return nil, err
 	}
 
+	// Validate HostedCluster name follows RFC1123 standard
+	// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names
 	errs := validation.IsDNS1123Label(opts.Name)
 	if len(errs) > 0 {
 		return nil, fmt.Errorf("HostedCluster name failed RFC1123 validation: %s", strings.Join(errs[:], " "))
 	}
 
+	// Validate HostedCluster with this name doesn't exist in the namespace
 	if err := opts.validateClusterExistence(ctx); err != nil {
 		return nil, err
 	}
+	// Validate multi-arch aspects
 	if err := opts.validateArchAndFeatureSet(); err != nil {
 		return nil, err
 	}
@@ -804,6 +813,7 @@ func (opts *RawCreateOptions) validateArchAndFeatureSet() error {
 		return fmt.Errorf("specified arch %q is not supported", opts.Arch)
 	}
 
+	// Validate feature set is "", TechPreviewNoUpgrade, or DevPreviewNoUpgrade
 	switch opts.FeatureSet {
 	case string(configv1.Default), string(configv1.TechPreviewNoUpgrade), string(configv1.DevPreviewNoUpgrade):
 	case string(configv1.CustomNoUpgrade):
diff --git a/cmd/install/assets/hypershift_operator.go b/cmd/install/assets/hypershift_operator.go
index 592531ff749..d2af9802030 100644
--- a/cmd/install/assets/hypershift_operator.go
+++ b/cmd/install/assets/hypershift_operator.go
@@ -993,6 +993,9 @@ func (o HyperShiftOperatorDeployment) addAzurePlatformResources(envVars *[]corev
 	if o.AzurePLSResourceGroup != "" {
 		*envVars = append(*envVars, corev1.EnvVar{Name: "AZURE_RESOURCE_GROUP", Value: o.AzurePLSResourceGroup})
 	}
+	// Workload identity mode: the SA annotation triggers Azure AD Workload Identity
+	// webhook to inject federated tokens. Set the client ID as an env var so the
+	// HO platform controller can construct credentials.
 	if o.AzurePLSManagedIdentityClientID != "" {
 		*envVars = append(*envVars, corev1.EnvVar{Name: "AZURE_PLS_CLIENT_ID", Value: o.AzurePLSManagedIdentityClientID})
 		if o.AzurePLSSubscriptionID != "" {
diff --git a/cmd/install/install.go b/cmd/install/install.go
index ed798ea8f4f..0f91fcedbd0 100644
--- a/cmd/install/install.go
+++ b/cmd/install/install.go
@@ -185,6 +185,8 @@ func (o *Options) validatePlatformConfig() []error {
 			errs = append(errs, fmt.Errorf("--aws-private-region and --aws-private-creds or --aws-private-secret are required with --private-platform=%s", hyperv1.AWSPlatform))
 		}
 	case hyperv1.GCPPlatform:
+		// GCP uses Workload Identity Federation, no credentials required.
+		// However, --gcp-project and --gcp-region must be set together.
 		if (o.GCPProject == "") != (o.GCPRegion == "") {
 			errs = append(errs, fmt.Errorf("--gcp-project and --gcp-region must be set together when --private-platform=%s", hyperv1.GCPPlatform))
 		}
@@ -242,6 +244,7 @@ func (o *Options) validateExternalDNSConfig() []error {
 		return nil
 	}
 	var errs []error
+	// Credentials are optional for GCP when using Workload Identity
 	credentialsRequired := o.ExternalDNSProvider != "google"
 	if credentialsRequired && len(o.ExternalDNSCredentials) == 0 && len(o.ExternalDNSCredentialsSecret) == 0 {
 		errs = append(errs, fmt.Errorf("--external-dns-credentials or --external-dns-credentials-secret are required with --external-dns-provider"))
@@ -282,20 +285,24 @@ func (o *Options) validateImageConfig() []error {
 	return errs
 }
 
+// Validate scale-from-zero credentials
 func (o *Options) validateScaleFromZeroConfig() []error {
 	if len(o.ScaleFromZeroCreds) == 0 && len(o.ScaleFromZeroCredentialsSecret) == 0 {
 		return nil
 	}
 	var errs []error
 	supportedProviders := set.New("aws")
+	// Check mutual exclusivity - only one of file or secret should be provided
 	if len(o.ScaleFromZeroCreds) != 0 && len(o.ScaleFromZeroCredentialsSecret) != 0 {
 		errs = append(errs, fmt.Errorf("only one of --scale-from-zero-creds or --scale-from-zero-secret is supported"))
 	}
+	// Provider is required when using scale-from-zero credentials
 	if len(o.ScaleFromZeroProvider) == 0 {
 		errs = append(errs, fmt.Errorf("--scale-from-zero-provider is required when using scale-from-zero credentials"))
 	} else if !supportedProviders.Has(o.ScaleFromZeroProvider) {
 		errs = append(errs, fmt.Errorf("invalid --scale-from-zero-provider: %s (must be one of: %v)", o.ScaleFromZeroProvider, supportedProviders.UnsortedList()))
 	}
+	// Validate credentials file exists and is accessible if provided
 	if len(o.ScaleFromZeroCreds) > 0 {
 		if _, err := os.Stat(o.ScaleFromZeroCreds); err != nil {
 			if os.IsNotExist(err) {
@@ -324,6 +331,7 @@ func (o *Options) validateMiscConfig() []error {
 	if len(o.ManagedService) > 0 && o.ManagedService != hyperv1.AroHCP {
 		errs = append(errs, fmt.Errorf("not a valid managed service type: %s", o.ManagedService))
 	}
+	// Validate all the platforms in the list are valid
 	for _, platform := range o.PlatformsToInstall {
 		platformToCheck := strings.ToLower(platform)
 		if !ValidPlatforms.Has(platformToCheck) {
diff --git a/control-plane-operator/controllers/awsprivatelink/awsprivatelink_controller.go b/control-plane-operator/controllers/awsprivatelink/awsprivatelink_controller.go
index 696a42bc817..8ba562ccb3b 100644
--- a/control-plane-operator/controllers/awsprivatelink/awsprivatelink_controller.go
+++ b/control-plane-operator/controllers/awsprivatelink/awsprivatelink_controller.go
@@ -677,8 +677,10 @@ func deleteEndpointIfWrongService(ctx context.Context, ec2Client awsapi.EC2API,
 }
 
 func modifyEndpointIfNeeded(ctx context.Context, ec2Client awsapi.EC2API, awsEndpointService *hyperv1.AWSEndpointService, endpoint ec2types.VpcEndpoint, endpointID string, log logr.Logger) error {
+	// Ensure endpoint has the right subnets.
 	addedSubnet, removedSubnet := diffIDs(awsEndpointService.Spec.SubnetIDs, endpoint.SubnetIds)
 
+	// Ensure endpoint has the right SG.
 	existingSG := make([]string, 0)
 	for _, group := range endpoint.Groups {
 		existingSG = append(existingSG, aws.ToString(group.GroupId))
@@ -816,6 +818,8 @@ func (r *AWSEndpointServiceReconciler) reconcileExternalNameServices(ctx context
 	isPublic := netutil.IsPublicHCP(hcp)
 	externalNames := hcpExternalNames(hcp)
 
+	// only if not public and external names are configured, create services of type ExternalName so external-dns
+	// can create records for them
 	if !isPublic && len(externalNames) > 0 {
 		var errs []error
 		for svcType, externalName := range externalNames {
@@ -839,6 +843,7 @@ func (r *AWSEndpointServiceReconciler) reconcileExternalNameServices(ctx context
 		return nil
 	}
 
+	// if the cluster is public, ensure that any ExternalName services are removed
 	privateExternalServices := &corev1.ServiceList{}
 	if err := r.List(ctx, privateExternalServices, client.InNamespace(hcp.Namespace), client.HasLabels{externalPrivateServiceLabel}); err != nil {
 		return fmt.Errorf("cannot list private external services: %w", err)
@@ -1106,6 +1111,7 @@ func (r *AWSEndpointServiceReconciler) delete(ctx context.Context, awsEndpointSe
 		}
 
 		if output != nil && len(output.VpcEndpoints) != 0 {
+			// Once the VPC Endpoint is deleted, we need to return an error to reexecute the reconciliation
 			return false, fmt.Errorf("resource requested for deletion but still present")
 		}
 
diff --git a/control-plane-operator/controllers/azureprivatelinkservice/controller.go b/control-plane-operator/controllers/azureprivatelinkservice/controller.go
index 13cd2255fdc..1c75c9a2529 100644
--- a/control-plane-operator/controllers/azureprivatelinkservice/controller.go
+++ b/control-plane-operator/controllers/azureprivatelinkservice/controller.go
@@ -860,6 +860,9 @@ func (r *AzurePrivateLinkServiceReconciler) handleAzureError(ctx context.Context
 // CPO finalizer has completed PE cleanup and removed its finalizer from the CR.
 func (r *AzurePrivateLinkServiceReconciler) reconcileDelete(ctx context.Context, azPLS *hyperv1.AzurePrivateLinkService, log logr.Logger) error {
 	resourceGroup := azPLS.Spec.ResourceGroupName
+	// Delete DNS resources using the zone name persisted in status.
+	// This avoids a dependency on the HostedControlPlane during deletion, which may
+	// already be torn down or unavailable when the finalizer runs.
 	dnsZoneName := azPLS.Status.DNSZoneName
 
 	if dnsZoneName != "" {
@@ -961,6 +964,9 @@ func (r *AzurePrivateLinkServiceReconciler) deleteBaseDomainResources(ctx contex
 		return err
 	}
 
+	// Only delete the base domain DNS zone if no other CRs share it.
+	// When multiple CRs (e.g., private-router and oauth-openshift) use the same
+	// base domain zone, the zone must not be deleted until the last CR is removed.
 	hasSiblings, err := r.hasSiblingCR(ctx, azPLS)
 	if err != nil {
 		return fmt.Errorf("failed to check for sibling CRs during base domain zone cleanup: %w", err)
@@ -988,6 +994,9 @@ func (r *AzurePrivateLinkServiceReconciler) deleteBaseDomainARecords(ctx context
 	var baseDomainRecords []string
 	if azPLS.Name == privateRouterCRName {
 		baseDomainRecords = append(baseDomainRecords, kasBaseDomainRecordPrefix+clusterName)
+		// Only delete the oauth record if there is no sibling OAuth CR that
+		// owns it. This prevents the private-router deletion from removing
+		// an oauth record that now belongs to the dedicated OAuth CR.
 		hasSiblings, err := r.hasSiblingCR(ctx, azPLS)
 		if err != nil {
 			return fmt.Errorf("failed to check for sibling CRs during base domain cleanup: %w", err)
@@ -1013,6 +1022,10 @@ func (r *AzurePrivateLinkServiceReconciler) deleteBaseDomainARecords(ctx context
 	return nil
 }
 
+// deletePrivateEndpoint always attempts deletion by deterministic name, even
+// when PrivateEndpointID is empty. If the status was never populated (e.g.,
+// status update failed after PE creation), relying solely on PrivateEndpointID
+// would orphan the PE in the customer's subscription.
 func (r *AzurePrivateLinkServiceReconciler) deletePrivateEndpoint(ctx context.Context, resourceGroup, crName, statusPEID string, log logr.Logger) error {
 	peName := privateEndpointName(crName)
 	log.Info("Deleting Private Endpoint", "name", peName, "hasStatusID", statusPEID != "")
diff --git a/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go b/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go
index 017ac10a8d3..d21ee116ffd 100644
--- a/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go
+++ b/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go
@@ -502,6 +502,7 @@ func (r *HostedControlPlaneReconciler) reconcileKASStatus(ctx context.Context, h
 			return fmt.Errorf("failed to fetch Kube APIServer deployment %s/%s: %w", deployment.Namespace, deployment.Name, err)
 		}
 	} else {
+		// Assume the deployment is unavailable until proven otherwise.
 		newCondition = metav1.Condition{
 			Type:   string(hyperv1.KubeAPIServerAvailable),
 			Status: metav1.ConditionFalse,
@@ -670,6 +671,8 @@ func (r *HostedControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.R
 		return reconcile.Result{}, fmt.Errorf("failed to look up release image metadata: %w", err)
 	}
 
+	// This runs after LookupReleaseImage so we can use the version and resolved
+	// digest from the release image metadata.
 	if err := r.reconcileControlPlaneVersionStatus(ctx, hostedControlPlane, originalHostedControlPlane, releaseImage); err != nil {
 		return reconcile.Result{}, err
 	}
@@ -847,6 +850,8 @@ func (r *HostedControlPlaneReconciler) reconcileControlPlaneVersionStatus(ctx co
 	if err := r.Client.Get(ctx, client.ObjectKeyFromObject(pullSecret), pullSecret); err != nil {
 		return fmt.Errorf("failed to get pull secret for version reconciliation: %w", err)
 	}
+	// Resolve the release image to its digest so controlPlaneVersion records
+	// the immutable image reference, consistent with how CVO records images.
 	_, resolvedRef, err := r.ImageMetadataProvider.GetDigest(ctx, util.HCPControlPlaneReleaseImage(hostedControlPlane), pullSecret.Data[corev1.DockerConfigJsonKey])
 	if err != nil {
 		return fmt.Errorf("failed to resolve control plane release image digest: %w", err)
@@ -855,6 +860,9 @@ func (r *HostedControlPlaneReconciler) reconcileControlPlaneVersionStatus(ctx co
 
 	componentsList := &hyperv1.ControlPlaneComponentList{}
 	if listErr := r.Client.List(ctx, componentsList, client.InNamespace(hostedControlPlane.Namespace)); listErr != nil {
+		// On list failure, ensure a Partial entry exists so consumers
+		// know an upgrade was attempted. Preserve observedGeneration.
+		// Persist the Partial entry before returning the error.
 		hostedControlPlane.Status.ControlPlaneVersion = ensureControlPlaneVersionPartial(hostedControlPlane, clk, releaseImage.Version(), resolvedImage)
 		if patchErr := r.Client.Status().Patch(ctx, hostedControlPlane, client.MergeFromWithOptions(originalHostedControlPlane, client.MergeFromWithOptimisticLock{})); patchErr != nil {
 			return fmt.Errorf("failed to patch status after component list failure: %w (list error: %v)", patchErr, listErr)
@@ -1590,6 +1598,10 @@ func (r *HostedControlPlaneReconciler) reconcileOLMAndMiscCerts(ctx context.Cont
 }
 
 func (r *HostedControlPlaneReconciler) reconcileNetworkServingCerts(ctx context.Context, hcp *hyperv1.HostedControlPlane, p *pki.PKIParams, createOrUpdate upsert.CreateOrUpdateFN, rootCASecret *corev1.Secret) error {
+	// For the Multus Admission Controller, Network Node Identity, and OVN Control Plane Metrics Serving Certs:
+	//   We want to remove the secret if there was an existing one created by service-ca; otherwise, it will cause
+	//   issues in cases where you are upgrading an older CPO prior to us adding the feature to reconcile the serving
+	//   cert secret ourselves.
 	if !netutil.IsDisableMultiNetwork(hcp) {
 		multusAdmissionControllerService := manifests.MultusAdmissionControllerService(hcp.Namespace)
 		if err := r.Get(ctx, client.ObjectKeyFromObject(multusAdmissionControllerService), multusAdmissionControllerService); err != nil {
@@ -1598,6 +1610,8 @@ func (r *HostedControlPlaneReconciler) reconcileNetworkServingCerts(ctx context.
 			}
 		}
 
+		// If the service doesn't have the service ca annotation, delete any previous secret with the annotation and
+		// reconcile the secret with our own rootCA; otherwise, skip reconciling the secret with our own rootCA.
 		if hasServiceCAAnnotation := doesServiceHaveServiceCAAnnotation(multusAdmissionControllerService); !hasServiceCAAnnotation {
 			multusAdmissionControllerServingCertSecret := manifests.MultusAdmissionControllerServingCert(hcp.Namespace)
 
@@ -1620,6 +1634,8 @@ func (r *HostedControlPlaneReconciler) reconcileNetworkServingCerts(ctx context.
 		}
 	}
 
+	// If the service doesn't have the service ca annotation, delete any previous secret with the annotation and
+	// reconcile the secret with our own rootCA; otherwise, skip reconciling the secret with our own rootCA.
 	if hasServiceCAAnnotation := doesServiceHaveServiceCAAnnotation(networkNodeIdentityService); !hasServiceCAAnnotation {
 		networkNodeIdentityServingCertSecret := manifests.NetworkNodeIdentityControllerServingCert(hcp.Namespace)
 
@@ -1641,6 +1657,8 @@ func (r *HostedControlPlaneReconciler) reconcileNetworkServingCerts(ctx context.
 		}
 	}
 
+	// If the service doesn't have the service ca annotation, delete any previous secret with the annotation and
+	// reconcile the secret with our own rootCA; otherwise, skip reconciling the secret with our own rootCA.
 	if hasServiceCAAnnotation := doesServiceHaveServiceCAAnnotation(ovnControlPlaneService); !hasServiceCAAnnotation {
 		ovnControlPlaneMetricsServingCertSecret := manifests.OVNControlPlaneMetricsServingCert(hcp.Namespace)
 
diff --git a/control-plane-operator/controllers/hostedcontrolplane/oauth/idp_convert.go b/control-plane-operator/controllers/hostedcontrolplane/oauth/idp_convert.go
index 4aeb258be96..5704c079812 100644
--- a/control-plane-operator/controllers/hostedcontrolplane/oauth/idp_convert.go
+++ b/control-plane-operator/controllers/hostedcontrolplane/oauth/idp_convert.go
@@ -191,7 +191,7 @@ func convertGitLabIDP(providerConfig *configv1.IdentityProviderConfig, i int, id
 		ClientSecret: configv1.StringSource{StringSourceSpec: configv1.StringSourceSpec{
 			File: idpVolumeMounts.SecretPath(i, gitlabConfig.ClientSecret.Name, "client-secret", configv1.ClientSecretKey),
 		}},
-		Legacy: new(bool),
+		Legacy: new(bool), // we require OIDC for GitLab now
 	}
 	if gitlabConfig.CA.Name != "" {
 		provider.CA = idpVolumeMounts.ConfigMapPath(i, gitlabConfig.CA.Name, "ca", corev1.ServiceAccountRootCAKey)
@@ -235,7 +235,7 @@ func convertKeystoneIDP(providerConfig *configv1.IdentityProviderConfig, i int,
 		TypeMeta:             metav1.TypeMeta{Kind: "KeystonePasswordIdentityProvider", APIVersion: osinv1.GroupVersion.String()},
 		RemoteConnectionInfo: configv1.RemoteConnectionInfo{URL: keystoneConfig.URL},
 		DomainName:           keystoneConfig.DomainName,
-		UseKeystoneIdentity:  true,
+		UseKeystoneIdentity:  true, // force use of keystone ID
 	}
 	if keystoneConfig.CA.Name != "" {
 		provider.RemoteConnectionInfo.CA = idpVolumeMounts.ConfigMapPath(i, keystoneConfig.CA.Name, "ca", corev1.ServiceAccountRootCAKey)
@@ -291,6 +291,7 @@ func convertOpenIDIDP(ctx context.Context, providerConfig *configv1.IdentityProv
 		ExtraScopes:              openIDConfig.ExtraScopes,
 		ExtraAuthorizeParameters: openIDConfig.ExtraAuthorizeParameters,
 	}
+	// Handle special case for IBM Cloud's OIDC provider (need to override some fields not available in public api)
 	if configOverride != nil {
 		openIDProvider.URLs = configOverride.URLs
 		openIDProvider.Claims = configOverride.Claims
@@ -308,6 +309,7 @@ func convertOpenIDIDP(ctx context.Context, providerConfig *configv1.IdentityProv
 			}
 		}
 		openIDProvider.Claims = osinv1.OpenIDClaims{
+			// There is no longer a user-facing setting for ID as it is considered unsafe
 			ID:                []string{configv1.UserIDClaim},
 			PreferredUsername: openIDConfig.Claims.PreferredUsername,
 			Name:              openIDConfig.Claims.Name,
@@ -322,6 +324,9 @@ func convertOpenIDIDP(ctx context.Context, providerConfig *configv1.IdentityProv
 	if configOverride != nil && configOverride.Challenge != nil {
 		data.challenge = *configOverride.Challenge
 	} else {
+		// openshift CR validating in kube-apiserver does not allow
+		// challenge-redirecting IdPs to be configured with OIDC so it is safe
+		// to allow challenge-issuing flow if it's available on the OIDC side
 		challengeFlowsAllowed, err := checkOIDCPasswordGrantFlow(ctx, kclient, openIDProvider.URLs.Token, openIDConfig.ClientID, namespace, openIDConfig.CA, openIDConfig.ClientSecret, skipKonnectivityDialer)
 		if err != nil {
 			return nil, fmt.Errorf("error attempting password grant flow: %v", err)
diff --git a/control-plane-operator/controllers/hostedcontrolplane/v2/oauth/idp_convert.go b/control-plane-operator/controllers/hostedcontrolplane/v2/oauth/idp_convert.go
index d4e12f63669..4f5f85944c9 100644
--- a/control-plane-operator/controllers/hostedcontrolplane/v2/oauth/idp_convert.go
+++ b/control-plane-operator/controllers/hostedcontrolplane/v2/oauth/idp_convert.go
@@ -381,7 +381,7 @@ func convertOpenIDIDP(
 		ExtraScopes:              openIDConfig.ExtraScopes,
 		ExtraAuthorizeParameters: openIDConfig.ExtraAuthorizeParameters,
 	}
-	//Handle special case for IBM Cloud's OIDC provider (need to override some fields not available in public api)
+	// Handle special case for IBM Cloud's OIDC provider (need to override some fields not available in public api)
 	if configOverride != nil {
 		openIDProvider.URLs = configOverride.URLs
 		openIDProvider.Claims = configOverride.Claims
diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go
index c929a3a9e1c..1ee6a3a7180 100644
--- a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go
+++ b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go
@@ -504,6 +504,7 @@ func (r *reconciler) cleanupLegacyResources(ctx context.Context, log logr.Logger
 }
 
 func (r *reconciler) reconcileDeletion(ctx context.Context, log logr.Logger, hcp *hyperv1.HostedControlPlane) (ctrl.Result, error) {
+	// Delete admission policies during cluster deletion to allow HCCO cleanup operations for ARO HCP
 	if hcp.Spec.Platform.Type == hyperv1.AzurePlatform {
 		registryConfigManagementStateAdmissionPolicy := registry.AdmissionPolicy{Name: registry.AdmissionPolicyNameManagementState}
 		log.Info("Cluster is being deleted, deleting registry management state admission policy and binding to allow cleanup")
@@ -607,6 +608,10 @@ func (r *reconciler) reconcileRegistryAndIngress(ctx context.Context, hcp *hyper
 	}
 
 	if capabilities.IsImageRegistryCapabilityEnabled(hcp.Spec.Capabilities) {
+		// For platforms where cluster-image-registry-operator (CIRO) needs a PVC to be created, bootstrap needs to happen
+		// in CIRO before the registry config is created. For now, this is the case for the OpenStack platform.
+		// If the object exist, we reconcile the registry config for other fields as it should be fine since the PVC would
+		// exist at this point.
 		if imageRegistryPlatformWithPVC(hcp.Spec.Platform.Type) && (!registryConfigExists || registryConfig == nil) {
 			log.Info("skipping registry config to let CIRO bootstrap")
 		} else {
@@ -623,6 +628,7 @@ func (r *reconciler) reconcileRegistryAndIngress(ctx context.Context, hcp *hyper
 				errs = append(errs, fmt.Errorf("failed to reconcile imageregistry config: %w", err))
 			}
 
+			// TODO: remove this when ROSA HCP stops setting the managementState to Removed to disable the Image Registry
 			if registryConfig.Spec.ManagementState == operatorv1.Removed && r.platformType != hyperv1.IBMCloudPlatform && r.platformType != hyperv1.AzurePlatform {
 				log.Info("imageregistry operator managementstate is removed, disabling openshift-controller-manager controllers and cleaning up resources")
 				ocmConfigMap := cpomanifests.OpenShiftControllerManagerConfig(r.hcpNamespace)
@@ -829,10 +835,13 @@ func (r *reconciler) reconcileNetworkingAndSecrets(ctx context.Context, hcp *hyp
 	}); err != nil {
 		errs = append(errs, fmt.Errorf("failed to reconcile network operator: %w", err))
 	}
+	// Detect suboptimal MTU size on kubevirt hosted cluster with ovn-k and raise a condition in such a case
 	if err := networkoperator.DetectSuboptimalMTU(ctx, r.cpClient, networkOperator, hcp); err != nil {
 		errs = append(errs, err)
 	}
 
+	// this allows users to disable data collection in sensitive environments
+	// solves https://issues.redhat.com/browse/OCPBUGS-12208
 	ensureExistsReconciliationStrategy := false
 	if _, exists := hcp.Annotations[hyperv1.EnsureExistsPullSecretReconciliation]; exists {
 		ensureExistsReconciliationStrategy = true
@@ -1819,11 +1828,13 @@ func (r *reconciler) reconcileControlPlaneConnectionAvailable(ctx context.Contex
 	cm := manifests.KASConnectionCheckerConfigMap()
 	if err := r.client.Get(ctx, client.ObjectKeyFromObject(cm), cm); err != nil {
 		if apierrors.IsNotFound(err) {
+			// CPO has not created the configmap yet, wait for create
 			condition.Reason = hyperv1.ControlPlaneConnectionConfigMapNotFoundReason
 			condition.Message = fmt.Sprintf("Connectivity check ConfigMap %s/%s not found; the hosted cluster config operator may not have reconciled it yet",
 				manifests.KASConnectionCheckerNamespace, manifests.KASConnectionCheckerConfigMapName)
 			return r.patchHCPStatusCondition(ctx, hcp, condition)
 		}
+		// This should not happen as we are started by the CPO after the configmap should be created
 		condition.Reason = hyperv1.ReconcileErrorReason
 		condition.Message = fmt.Sprintf("Failed to get connectivity check ConfigMap %s/%s: %v",
 			manifests.KASConnectionCheckerNamespace, manifests.KASConnectionCheckerConfigMapName, err)
diff --git a/control-plane-operator/main.go b/control-plane-operator/main.go
index 2e2a2912aba..6fde842dde1 100644
--- a/control-plane-operator/main.go
+++ b/control-plane-operator/main.go
@@ -187,6 +187,10 @@ func defaultCommand() *cobra.Command {
 	return cmd
 }
 
+// For now, since the hosted cluster config operator is treated like any other
+// release payload component but isn't actually part of a release payload,
+// enable the user to specify an image directly as a flag, and otherwise
+// try and detect the control plane operator's image to use instead.
 func newOperatorImageLookup(mgr ctrl.Manager, namespace, deploymentName string) func(string) (string, error) {
 	return func(userSpecifiedImage string) (string, error) {
 		if len(userSpecifiedImage) > 0 {
@@ -201,6 +205,7 @@ func newOperatorImageLookup(mgr ctrl.Manager, namespace, deploymentName string)
 			}
 
 			for _, container := range me.Spec.Containers {
+				// If CPO container image is a sha256 reference, use it
 				if container.Name == "control-plane-operator" {
 					if strings.Contains(container.Image, "@sha256:") {
 						return container.Image, nil
@@ -208,6 +213,7 @@ func newOperatorImageLookup(mgr ctrl.Manager, namespace, deploymentName string)
 				}
 			}
 
+			// Use the container status to make sure we get the sha256 reference
 			for _, container := range me.Status.ContainerStatuses {
 				if container.Name == "control-plane-operator" {
 					image := strings.TrimPrefix(container.ImageID, "docker-pullable://")
@@ -289,14 +295,16 @@ func buildReleaseProviders(componentImages map[string]string, registryOverrides
 		ComponentImages: componentImages,
 	}
 
+	// It should be used to lookup spec.releaseImage.
 	userReleaseProvider := &releaseinfo.ProviderWithOpenShiftImageRegistryOverridesDecorator{
 		Delegate: &releaseinfo.RegistryMirrorProviderDecorator{
 			Delegate:          coreReleaseProvider,
-			RegistryOverrides: nil,
+			RegistryOverrides: nil, // UserReleaseProvider shouldn't include registry overrides as they should not get propagated to the data plane.
 		},
 		OpenShiftImageRegistryOverrides: imageRegistryOverrides,
 	}
 
+	// It should be used to lookup spec.controlPlaneReleaseImage.
 	cpReleaseProvider := &releaseinfo.ProviderWithOpenShiftImageRegistryOverridesDecorator{
 		Delegate: &releaseinfo.RegistryMirrorProviderDecorator{
 			Delegate:          coreReleaseProvider,
@@ -445,6 +453,7 @@ func NewStartCommand() *cobra.Command {
 			if err != nil {
 				return false, err
 			}
+			// Apparently this is occasionally set to an empty string
 			if hostedClusterConfigOperatorImage == "" {
 				setupLog.Info("hosted cluster config operator image is empty, retrying")
 				return false, nil
diff --git a/hypershift-operator/controllers/auditlogpersistence/snapshot_controller.go b/hypershift-operator/controllers/auditlogpersistence/snapshot_controller.go
index 9bb17347941..7d0d6a38832 100644
--- a/hypershift-operator/controllers/auditlogpersistence/snapshot_controller.go
+++ b/hypershift-operator/controllers/auditlogpersistence/snapshot_controller.go
@@ -76,6 +76,7 @@ func (r *SnapshotReconciler) getSnapshotConfig(ctx context.Context) (*auditlogpe
 		return nil, fmt.Errorf("failed to get AuditLogPersistenceConfig: %w", err)
 	}
 
+	// Apply defaults to a copy of the spec to avoid modifying the original
 	spec := config.Spec.DeepCopy()
 	ApplyDefaults(spec)
 
@@ -97,6 +98,7 @@ func (r *SnapshotReconciler) getLastObservedRestartCount(ctx context.Context, po
 		if podCopy.Annotations == nil {
 			podCopy.Annotations = make(map[string]string)
 		}
+		// Reset corrupted annotation to 0
 		podCopy.Annotations[lastObservedRestartCountAnnotation] = "0"
 		if patchErr := r.client.Patch(ctx, podCopy, client.MergeFrom(pod)); patchErr != nil {
 			log.Error(patchErr, "Failed to reset corrupted annotation")
@@ -118,6 +120,7 @@ func (r *SnapshotReconciler) checkSnapshotInterval(ctx context.Context, pod *cor
 		if podCopy.Annotations == nil {
 			podCopy.Annotations = make(map[string]string)
 		}
+		// Remove corrupted annotation - it will be set correctly after snapshot creation
 		delete(podCopy.Annotations, lastSnapshotTimeAnnotation)
 		if patchErr := r.client.Patch(ctx, podCopy, client.MergeFrom(pod)); patchErr != nil {
 			log.Error(patchErr, "Failed to remove corrupted last snapshot time annotation")
diff --git a/hypershift-operator/controllers/hostedcluster/etcd_recovery.go b/hypershift-operator/controllers/hostedcluster/etcd_recovery.go
index d4196de80b6..3d67b56e52f 100644
--- a/hypershift-operator/controllers/hostedcluster/etcd_recovery.go
+++ b/hypershift-operator/controllers/hostedcluster/etcd_recovery.go
@@ -88,6 +88,8 @@ func (r *HostedClusterReconciler) handleExistingEtcdRecoveryJob(ctx context.Cont
 	return false, nil
 }
 
+// Creating the condition for the first time or in the case of the ETCD fails intermittently.
+// If the ETCD keeps failing and recovering, we can see the hcluster.Generation increasing indefinitely.
 func (r *HostedClusterReconciler) setEtcdRecoveryCondition(ctx context.Context, hcluster *hyperv1.HostedCluster, status metav1.ConditionStatus, reason, message string) error {
 	condition := metav1.Condition{
 		Type:               string(hyperv1.EtcdRecoveryActive),
@@ -128,6 +130,7 @@ func (r *HostedClusterReconciler) detectAndTriggerEtcdRecovery(ctx context.Conte
 	}
 
 	if failingEtcdPod == nil {
+		// However, if the statefulset is not reporting fully available, check later
 		if !fullyAvailable {
 			return &requeueAfter, nil
 		}
diff --git a/hypershift-operator/controllers/hostedcluster/metrics/metrics.go b/hypershift-operator/controllers/hostedcluster/metrics/metrics.go
index 8c26a7bb0f4..898d55fa423 100644
--- a/hypershift-operator/controllers/hostedcluster/metrics/metrics.go
+++ b/hypershift-operator/controllers/hostedcluster/metrics/metrics.go
@@ -91,7 +91,8 @@ const (
 
 	HostedClusterManagedAzureInfoMetricName = "hosted_cluster_managed_azure_info"
 	HostedClusterManagedAzureInfoMetricHelp = "Reports Azure managed (ARO) specific information about the given HostedCluster"
-	HostedClusterManagedAzureResourceType   = "hcpOpenShiftClusters"
+	// see https://github.com/Azure/ARO-HCP/blob/4134b5bb53782858047a0493f31b250c811eb84c/api/redhatopenshift/resource-manager/Microsoft.RedHatOpenShift/hcpclusters/preview/2024-06-10-preview/openapi.json#L131
+	HostedClusterManagedAzureResourceType = "hcpOpenShiftClusters"
 
 	HostedClusterAzureInfoMetricName = "hosted_cluster_azure_info"
 	HostedClusterAzureInfoMetricHelp = "Reports Azure information about the given HostedCluster"
@@ -395,6 +396,7 @@ func collectInitialRollingOutMetric(ch chan<- prometheus.Metric, clk clock.Clock
 }
 
 func collectUpgradingDurationMetric(ch chan<- prometheus.Metric, clk clock.Clock, hcluster *hyperv1.HostedCluster, hclusterLabelValues []string) {
+	// The upgrade is adding a new entry in the history on top of the initial rollout.
 	if hcluster.Status.Version == nil || len(hcluster.Status.Version.History) <= 1 {
 		return
 	}
@@ -463,6 +465,7 @@ func (c *hostedClustersMetricsCollector) collectProxyMetrics(ch chan<- prometheu
 }
 
 func (c *hostedClustersMetricsCollector) collectProxyCAMetrics(ch chan<- prometheus.Metric, hcluster *hyperv1.HostedCluster, hclusterLabelValues []string) {
+	// Only report CA validity if a CA is actually configured
 	proxyCAValid := 0.0
 	validProxyCondition := meta.FindStatusCondition(hcluster.Status.Conditions, string(hyperv1.ValidProxyConfiguration))
 	if validProxyCondition != nil && validProxyCondition.Status == metav1.ConditionTrue {
@@ -475,6 +478,7 @@ func (c *hostedClustersMetricsCollector) collectProxyCAMetrics(ch chan<- prometh
 		hclusterLabelValues...,
 	)
 
+	// Silently skip expiry time if we can't fetch it
 	proxyExpiryTime := 0.0
 	expiryTime, err := c.expiryTimeProxyCA(hcluster)
 	if err == nil {
@@ -559,6 +563,7 @@ func collectAzureInfoMetrics(ch chan<- prometheus.Metric, hcluster *hyperv1.Host
 	}
 }
 
+// Use detailed credential status: 0=valid, 1=invalid, 2=unknown
 func collectAwsCredsMetric(ch chan<- prometheus.Metric, hcluster *hyperv1.HostedCluster, hclusterLabelValues []string) {
 	credStatus := platformaws.GetCredentialStatus(hcluster)
 	ch <- prometheus.MustNewConstMetric(
diff --git a/hypershift-operator/controllers/hostedcluster/network_policies.go b/hypershift-operator/controllers/hostedcluster/network_policies.go
index 9467de3b8da..f45a5ab6d04 100644
--- a/hypershift-operator/controllers/hostedcluster/network_policies.go
+++ b/hypershift-operator/controllers/hostedcluster/network_policies.go
@@ -101,6 +101,8 @@ func (r *HostedClusterReconciler) reconcileNetworkPolicies(ctx context.Context,
 }
 
 func (r *HostedClusterReconciler) reconcileIngressNetworkPolicy(ctx context.Context, createOrUpdate upsert.CreateOrUpdateFN, hcp *hyperv1.HostedControlPlane, controlPlaneNamespaceName string) error {
+	// Only needed when routes are served by the management cluster's default ingress controller,
+	// i.e., when routes are NOT labeled for the HCP router.
 	policy := networkpolicy.OpenshiftIngressNetworkPolicy(controlPlaneNamespaceName)
 	if !netutil.LabelHCPRoutes(hcp) {
 		if _, err := createOrUpdate(ctx, r.Client, policy, func() error {
@@ -328,6 +330,8 @@ func reconcilePrivateRouterNetworkPolicy(policy *networkingv1.NetworkPolicy, _ *
 			"app": "private-router",
 		},
 	}
+	// TODO: Network policy code should move to the control plane operator. For now,
+	// only setup ingress rules (and not egress rules) when version is < 4.14
 	if ingressOnly {
 		policy.Spec.PolicyTypes = []networkingv1.PolicyType{networkingv1.PolicyTypeIngress}
 		return nil
@@ -652,6 +656,15 @@ func reconcileVirtLauncherNetworkPolicy(log logr.Logger, policy *networkingv1.Ne
 	return buildVirtLauncherNetworkPolicyBase(log, policy, hcluster, blockedIPv4Networks, blockedIPv6Networks, controlPlanePeers)
 }
 
+// reconcileVirtLauncherNetworkPolicyExternalInfra builds the virt-launcher
+// NetworkPolicy for deployments where the KubeVirt VMs run on a separate
+// infrastructure cluster. Unlike the centralized variant, this omits egress
+// rules for control-plane pods (kube-apiserver, oauth, ignition-server-proxy)
+// because those pods reside on the management cluster and are reached via
+// external IPs already permitted by the broad 0.0.0.0/0 allow rule.
+// infraClusterNetwork may be nil when the infra kubeconfig lacks cluster-
+// scoped read access to networks.config.openshift.io. In that case the
+// policy is still created but without CIDR-based egress blocking.
 func reconcileVirtLauncherNetworkPolicyExternalInfra(log logr.Logger, policy *networkingv1.NetworkPolicy, hcluster *hyperv1.HostedCluster, infraClusterNetwork *configv1.Network) error {
 	blockedIPv4Networks := []string{}
 	blockedIPv6Networks := []string{}
@@ -667,8 +680,9 @@ func reconcileVirtLauncherNetworkPolicyExternalInfra(log logr.Logger, policy *ne
 	return buildVirtLauncherNetworkPolicyBase(log, policy, hcluster, blockedIPv4Networks, blockedIPv6Networks, nil)
 }
 
-// buildVirtLauncherNetworkPolicyBase constructs the virt-launcher
-// NetworkPolicy structure. extraEgressPeers are appended to the primary egress rule
+// buildVirtLauncherNetworkPolicyBase constructs the common virt-launcher
+// NetworkPolicy structure shared by both centralized and external infra
+// deployments. extraEgressPeers are appended to the primary egress rule
 // (e.g. control-plane pod selectors for centralized infra).
 func buildVirtLauncherNetworkPolicyBase(log logr.Logger, policy *networkingv1.NetworkPolicy, hcluster *hyperv1.HostedCluster, blockedIPv4Networks, blockedIPv6Networks []string, extraEgressPeers []networkingv1.NetworkPolicyPeer) error {
 	protocolTCP := corev1.ProtocolTCP
@@ -1039,6 +1053,11 @@ func (r *HostedClusterReconciler) reconcileExternalInfraVirtLauncherPolicy(ctx c
 	infraClient := kvInfraClient.GetInfraClient()
 	infraNamespace := kvInfraClient.GetInfraNamespace()
 
+	// networks.config.openshift.io is cluster-scoped, so the infra
+	// kubeconfig needs a ClusterRole with get permission on that
+	// resource. When the permission is missing we still create the
+	// NetworkPolicy but without CIDR-based egress blocking, and
+	// surface the RBAC gap as a condition on the HostedCluster.
 	infraClusterNetwork, err := fetchInfraClusterNetwork(ctx, infraClient, infraNamespace, hcluster, log)
 	if err != nil {
 		return err
diff --git a/hypershift-operator/controllers/hostedclustersizing/hostedclustersizing_controller.go b/hypershift-operator/controllers/hostedclustersizing/hostedclustersizing_controller.go
index 64983a9a73c..f13741230c6 100644
--- a/hypershift-operator/controllers/hostedclustersizing/hostedclustersizing_controller.go
+++ b/hypershift-operator/controllers/hostedclustersizing/hostedclustersizing_controller.go
@@ -180,6 +180,8 @@ func (r *reconciler) reconcile(
 	config *schedulingv1alpha1.ClusterSizingConfiguration, hostedCluster *hypershiftv1beta1.HostedCluster,
 ) (*action, error) {
 	if !isConfigValid(config) {
+		// we can't put clusters into t-shirt sizes unless we have a valid configuration;
+		// we'll re-trigger when the configuration object changes and can process clusters then
 		return nil, nil
 	}
 
@@ -200,6 +202,8 @@ func (r *reconciler) reconcile(
 
 	lastTransitionTime, lastSizeClass := previousTransitionFor(hostedCluster)
 	currentSizeClass, sizeClassLabelPresent := hostedCluster.ObjectMeta.Labels[hypershiftv1beta1.HostedClusterSizeLabel]
+	// we can't update both the status and the labels in one call, so when we have
+	// updated status but have not yet updated the labels, we just need to do that first
 	if lastTransitionTime != nil && !sizeClassLabelPresent || currentSizeClass != lastSizeClass {
 		return &action{
 			applyCfg: hypershiftv1beta1applyconfigurations.HostedCluster(hostedCluster.Name, hostedCluster.Namespace).
@@ -243,7 +247,7 @@ func (r *reconciler) determineSizeClass(
 			}
 		}
 		logger.Error(fmt.Errorf("could not find a size class for hosted cluster"), "no size can be set on hosted cluster")
-		return nil, nil
+		return nil, nil // user needs to reformat the field, returning error is useless
 	}
 
 	if autoScaling := hostedCluster.Annotations[hypershiftv1beta1.ResourceBasedControlPlaneAutoscalingAnnotation]; autoScaling == "true" {
@@ -272,6 +276,7 @@ func (r *reconciler) determineSizeClassFromAutoscaling(
 		}
 	}
 
+	// If no recommended size is set, or the recommended size wasn't found, use the first size class as fallback
 	sizeClass := &config.Spec.Sizes[0]
 	if recommendedSize == "" {
 		logger.Info("Resource-based autoscaling enabled but no recommended size set, using first size class", "defaultSize", sizeClass.Name)
@@ -349,6 +354,12 @@ func (r *reconciler) transitionSizeClass(
 	}
 	increasingSize := previousMinimumSize < sizeClass.Criteria.From
 
+	// For new clusters being added to the fleet, we have an SLA on creation time and can't
+	// afford to delay the first transition, as it is required for the control plane to schedule.
+	// For other clusters, we want to limit the amount of churn to promote management plane stability.
+	// third, we need to know if we're ready to transition the cluster:
+	// - the hosted cluster has limits to how quickly it can transition up and down, and
+	// - the management plane has limits to how many clusters can be transitioning at any time
 	if result := r.checkTransitionDelay(config, hostedCluster, sizeClass, increasingSize, lastTransitionTime); result != nil {
 		if result.applyCfg == nil {
 			return &action{requeueAfter: result.requeueAfter}, nil
@@ -394,6 +405,7 @@ func (r *reconciler) checkTransitionDelay(
 	sizeClass *schedulingv1alpha1.SizeConfiguration, increasingSize bool,
 	lastTransitionTime *time.Time,
 ) *action {
+	// if we transitioned in the past, we need to enforce the delay from there
 	delayStart := time.Time{}
 	if lastTransitionTime != nil {
 		delayStart = *lastTransitionTime
diff --git a/hypershift-operator/controllers/nodepool/aws.go b/hypershift-operator/controllers/nodepool/aws.go
index f1a847404e4..7f884700c4e 100644
--- a/hypershift-operator/controllers/nodepool/aws.go
+++ b/hypershift-operator/controllers/nodepool/aws.go
@@ -81,7 +81,7 @@ func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedClust
 
 	instanceMetadataOptions := &capiaws.InstanceMetadataOptions{
 		HTTPTokens:              capiaws.HTTPTokensStateOptional,
-		HTTPPutResponseHopLimit: 2,
+		HTTPPutResponseHopLimit: 2, // set to 2 as per AWS recommendation for container envs https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html#imds-considerations
 		HTTPEndpoint:            capiaws.InstanceMetadataEndpointStateEnabled,
 		InstanceMetadataTags:    capiaws.InstanceMetadataEndpointStateDisabled,
 	}
@@ -119,11 +119,13 @@ func awsMachineTemplateSpec(infraName string, hostedCluster *hyperv1.HostedClust
 }
 
 func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodePool, releaseImage *releaseinfo.ReleaseImage) (string, error) {
+	// TODO: Should the region be included in the NodePool platform information?
 	region := hostedCluster.Spec.Platform.AWS.Region
 	arch := nodePool.Spec.Arch
 	if nodePool.Spec.Platform.AWS.AMI != "" {
 		return nodePool.Spec.Platform.AWS.AMI, nil
 	}
+	// Use Windows AMI mapping when ImageType is set to Windows
 	if nodePool.Spec.Platform.AWS.ImageType == hyperv1.ImageTypeWindows {
 		ami, err := getWindowsAMI(region, arch, releaseImage)
 		if err != nil {
@@ -131,6 +133,7 @@ func resolveAWSAMI(hostedCluster *hyperv1.HostedCluster, nodePool *hyperv1.NodeP
 		}
 		return ami, nil
 	}
+	// Default behavior for Linux/RHCOS AMIs
 	ami, err := defaultNodePoolAMI(region, arch, releaseImage)
 	if err != nil {
 		return "", fmt.Errorf("couldn't discover an AMI for release image: %w", err)
@@ -208,8 +211,10 @@ func applyAWSPlacementOptions(nodePool *hyperv1.NodePool, spec *capiaws.AWSMachi
 	}
 	spec.Template.Spec.Tenancy = placement.Tenancy
 
+	// Handle market type - placement.MarketType takes precedence over capacityReservation.MarketType (deprecated)
 	switch placement.MarketType {
 	case hyperv1.MarketTypeSpot:
+		// Spot instances
 		spec.Template.Spec.SpotMarketOptions = &capiaws.SpotMarketOptions{}
 		if placement.Spot.MaxPrice != "" {
 			spec.Template.Spec.SpotMarketOptions.MaxPrice = ptr.To(placement.Spot.MaxPrice)
@@ -219,6 +224,7 @@ func applyAWSPlacementOptions(nodePool *hyperv1.NodePool, spec *capiaws.AWSMachi
 	case hyperv1.MarketTypeOnDemand:
 		spec.Template.Spec.MarketType = capiaws.MarketTypeOnDemand
 	default:
+		// If placement.MarketType is not set, fall back to capacityReservation.MarketType (deprecated)
 		if capacityReservation := placement.CapacityReservation; capacityReservation != nil {
 			//nolint:staticcheck // SA1019: capacityReservation.MarketType is deprecated but supported for backward compatibility
 			switch capacityReservation.MarketType {
@@ -227,6 +233,7 @@ func applyAWSPlacementOptions(nodePool *hyperv1.NodePool, spec *capiaws.AWSMachi
 			case hyperv1.MarketTypeOnDemand:
 				spec.Template.Spec.MarketType = capiaws.MarketTypeOnDemand
 			default:
+				// if the tenancy is not host and the ID is set, default the market type to CapacityBlock
 				if placement.Tenancy != "host" && capacityReservation.ID != nil {
 					spec.Template.Spec.MarketType = capiaws.MarketTypeCapacityBlock
 				}
@@ -234,6 +241,7 @@ func applyAWSPlacementOptions(nodePool *hyperv1.NodePool, spec *capiaws.AWSMachi
 		}
 	}
 
+	// Handle capacity reservation options
 	if capacityReservation := placement.CapacityReservation; capacityReservation != nil {
 		spec.Template.Spec.CapacityReservationID = capacityReservation.ID
 		spec.Template.Spec.CapacityReservationPreference = capiaws.CapacityReservationPreference(capacityReservation.Preference)
diff --git a/hypershift-operator/controllers/nodepool/capi.go b/hypershift-operator/controllers/nodepool/capi.go
index b66ebcd1d68..f2965b8a555 100644
--- a/hypershift-operator/controllers/nodepool/capi.go
+++ b/hypershift-operator/controllers/nodepool/capi.go
@@ -410,6 +410,9 @@ func (c *CAPI) reconcileMachineDeployment(ctx context.Context, log logr.Logger,
 
 	c.setMachineDeploymentMetadata(machineDeployment, capiClusterName)
 
+	// Set defaults. These are normally set by the CAPI machinedeployment webhook.
+	// However, since we don't run the webhook, CAPI updates the machinedeployment
+	// after it has been created with defaults.
 	machineDeployment.Spec.MinReadySeconds = ptr.To[int32](0)
 	machineDeployment.Spec.ClusterName = capiClusterName
 	if machineDeployment.Spec.Selector.MatchLabels == nil {
@@ -429,6 +432,8 @@ func (c *CAPI) reconcileMachineDeployment(ctx context.Context, log logr.Logger,
 				resourcesName:           resourcesName,
 				capiv1.ClusterNameLabel: capiClusterName,
 			},
+			// Annotations here propagate down to Machines
+			// https://cluster-api.sigs.k8s.io/developer/architecture/controllers/metadata-propagation.html#machinedeployment.
 			Annotations: map[string]string{
 				nodePoolAnnotation:                       client.ObjectKeyFromObject(nodePool).String(),
 				hyperv1.NodePoolReleaseVersionAnnotation: c.Version(),
@@ -451,6 +456,7 @@ func (c *CAPI) reconcileMachineDeployment(ctx context.Context, log logr.Logger,
 		},
 	}
 
+	// This label must be on the MachineDeployment template so the spot MHC can select machines
 	if isSpotEnabled(nodePool) {
 		machineDeployment.Spec.Template.Labels[interruptibleInstanceLabel] = ""
 	}
@@ -494,11 +500,13 @@ func (c *CAPI) setMachineDeploymentMetadata(machineDeployment *capiv1.MachineDep
 }
 
 func setMachineDeploymentFailureDomain(nodePool *hyperv1.NodePool, machineDeployment *capiv1.MachineDeployment) {
+	// The CAPI provider for OpenStack uses the FailureDomain field to set the availability zone.
 	if nodePool.Spec.Platform.Type == hyperv1.OpenStackPlatform && nodePool.Spec.Platform.OpenStack != nil {
 		if nodePool.Spec.Platform.OpenStack.AvailabilityZone != "" {
 			machineDeployment.Spec.Template.Spec.FailureDomain = ptr.To(nodePool.Spec.Platform.OpenStack.AvailabilityZone)
 		}
 	}
+	// The CAPI provider for GCP uses the FailureDomain field to set the zone.
 	if nodePool.Spec.Platform.Type == hyperv1.GCPPlatform && nodePool.Spec.Platform.GCP != nil {
 		if nodePool.Spec.Platform.GCP.Zone != "" {
 			machineDeployment.Spec.Template.Spec.FailureDomain = ptr.To(nodePool.Spec.Platform.GCP.Zone)
@@ -506,6 +514,11 @@ func setMachineDeploymentFailureDomain(nodePool *hyperv1.NodePool, machineDeploy
 	}
 }
 
+// propagateLabelsAndTaintsToMachines propagates label/taints directly into Machines
+// to avoid a NodePool label/taints change triggering a rolling upgrade.
+// TODO(Alberto): drop this and rely on core in-place propagation once CAPI 1.4.0
+// https://github.com/kubernetes-sigs/cluster-api/releases comes through the payload.
+// https://issues.redhat.com/browse/HOSTEDCP-971
 func (c *CAPI) propagateLabelsAndTaintsToMachines(ctx context.Context, log logr.Logger, machineDeployment *capiv1.MachineDeployment) error {
 	nodePool := c.nodePool
 	machineList := &capiv1.MachineList{}
@@ -530,6 +543,9 @@ func (c *CAPI) propagateLabelsAndTaintsToMachines(ctx context.Context, log logr.
 				machine.Labels[labelKey] = v
 			}
 
+			// Propagate globalPS managed label to Machines so the HCCO Node controller
+			// applies it to Nodes. This enables the GlobalPullSecret DaemonSet to
+			// schedule on Replace nodes. Only AWS and Azure platforms support this.
 			if nodePool.Spec.Platform.Type == hyperv1.AWSPlatform || nodePool.Spec.Platform.Type == hyperv1.AzurePlatform {
 				globalPSLabelKey := fmt.Sprintf("%s.%s", labelManagedPrefix, globalPSNodeLabel)
 				machine.Labels[globalPSLabelKey] = "true"
@@ -594,6 +610,8 @@ func (c *CAPI) reconcileMachineDeploymentStatus(log logr.Logger, machineDeployme
 	targetConfigHash := c.HashWithoutVersion()
 	targetConfigVersionHash := c.Hash()
 
+	// If the MachineDeployment is now processing we know
+	// is at the expected version (spec.version) and config (userData Secret) so we reconcile status and annotation.
 	if MachineDeploymentComplete(machineDeployment) {
 		if nodePool.Status.Version != targetVersion {
 			log.Info("Version update complete",
diff --git a/hypershift-operator/controllers/nodepool/metrics/metrics.go b/hypershift-operator/controllers/nodepool/metrics/metrics.go
index d52b458db75..5283c729976 100644
--- a/hypershift-operator/controllers/nodepool/metrics/metrics.go
+++ b/hypershift-operator/controllers/nodepool/metrics/metrics.go
@@ -390,6 +390,7 @@ func (c *nodePoolsMetricsCollector) Collect(ch chan<- prometheus.Metric) {
 	c.lastCollectTime = currentCollectTime
 }
 
+// Hosted clusters loop
 func (c *nodePoolsMetricsCollector) collectHostedClusterData(ctx context.Context) map[string]*hclusterData {
 	hclusterPathToData := make(map[string]*hclusterData)
 	hclusters := &hyperv1.HostedClusterList{}
@@ -414,6 +415,7 @@ func (c *nodePoolsMetricsCollector) collectHostedClusterData(ctx context.Context
 	return hclusterPathToData
 }
 
+// Machine sets loop
 func (c *nodePoolsMetricsCollector) collectMachineSetReplicas(ctx context.Context) map[string]int32 {
 	result := make(map[string]int32)
 	machineSets := &capiv1.MachineSetList{}
@@ -422,11 +424,13 @@ func (c *nodePoolsMetricsCollector) collectMachineSetReplicas(ctx context.Contex
 	}
 	for k := range machineSets.Items {
 		machineSet := &machineSets.Items[k]
+		// we use machineSet.Spec.Replicas because nodePool.Spec.Replicas will not be set if autoscaling is enabled
 		result[machineSet.Namespace+"/"+machineSet.Name] = *machineSet.Spec.Replicas
 	}
 	return result
 }
 
+// Machine deployments loop
 func (c *nodePoolsMetricsCollector) collectMachineDeploymentReplicas(ctx context.Context) map[string]int32 {
 	result := make(map[string]int32)
 	machineDeployments := &capiv1.MachineDeploymentList{}
@@ -435,6 +439,7 @@ func (c *nodePoolsMetricsCollector) collectMachineDeploymentReplicas(ctx context
 	}
 	for k := range machineDeployments.Items {
 		md := &machineDeployments.Items[k]
+		// we use machineDeployment.Spec.Replicas because nodePool.Spec.Replicas will not be set if autoscaling is enabled
 		result[md.Namespace+"/"+md.Name] = *md.Spec.Replicas
 	}
 	return result
diff --git a/hypershift-operator/controllers/platform/aws/controller.go b/hypershift-operator/controllers/platform/aws/controller.go
index 7d69152e39a..e7779ba5a57 100644
--- a/hypershift-operator/controllers/platform/aws/controller.go
+++ b/hypershift-operator/controllers/platform/aws/controller.go
@@ -460,6 +460,7 @@ func listKarpenterSubnetIDs(ctx context.Context, c client.Client, namespace stri
 	return subnetIDs, nil
 }
 
+// If a previous awsendpointservice that points to an ingress controller exists, remove it
 func (r *AWSEndpointServiceReconciler) deleteObsoleteEndpointService(ctx context.Context, awsEndpointService *hyperv1.AWSEndpointService) (done bool, err error) {
 	endpointServices := &hyperv1.AWSEndpointServiceList{}
 	if err := r.List(ctx, endpointServices, client.InNamespace(awsEndpointService.Namespace)); err != nil {
@@ -476,6 +477,7 @@ func (r *AWSEndpointServiceReconciler) deleteObsoleteEndpointService(ctx context
 			hasPrivateIngressControllerEPService = true
 		}
 	}
+	// Only if both router and private ingress controller AWSEndpointServices exist, delete the obsolete one
 	if !hasPrivateRouterEPService || !hasPrivateIngressControllerEPService {
 		return false, nil
 	}
@@ -515,6 +517,7 @@ func (r *AWSEndpointServiceReconciler) ensureVpcEndpointService(ctx context.Cont
 			return "", "", err
 		}
 		if len(output.ServiceConfigurations) == 0 {
+			// clear the EndpointServiceName so a new Endpoint Service is created on the requeue
 			awsEndpointService.Status.EndpointServiceName = ""
 			return "", "", fmt.Errorf("endpoint service %s not found, resetting status", serviceName)
 		}
diff --git a/hypershift-operator/controllers/scheduler/aws/autoscaler.go b/hypershift-operator/controllers/scheduler/aws/autoscaler.go
index 90c04d95e0a..89ae151e63d 100644
--- a/hypershift-operator/controllers/scheduler/aws/autoscaler.go
+++ b/hypershift-operator/controllers/scheduler/aws/autoscaler.go
@@ -387,6 +387,8 @@ func machineSetsToScaleUp(pods []corev1.Pod, machineSets []machinev1beta1.Machin
 	}
 	requiredNodeCounts := determineRequiredNodes(pendingPods, pods, nodes)
 
+	// If a specific pair label is required, find the corresponding machinesets
+	// that are not already scaled up.
 	var placeHoldersNeeded []nodeRequirement
 	for _, r := range requiredNodeCounts {
 		if r.pairLabel != "" {
@@ -398,6 +400,7 @@ func machineSetsToScaleUp(pods []corev1.Pod, machineSets []machinev1beta1.Machin
 			result = append(result, machineSetsToScale...)
 			continue
 		}
+		// Otherwise, we need to find placeholders without a specific pair label
 		placeHoldersNeeded = append(placeHoldersNeeded, r)
 	}
 
@@ -415,6 +418,9 @@ func machineSetsToScaleUp(pods []corev1.Pod, machineSets []machinev1beta1.Machin
 	return result, pendingPods, requiredNodeCounts
 }
 
+// collectTakenPairLabels returns pair labels that cannot be used for new
+// placeholders because they are either (1) assigned to a hosted cluster
+// or (2) already have a placeholder scheduled.
 func collectTakenPairLabels(pods []corev1.Pod, nodes []corev1.Node) sets.Set[string] {
 	takenPairLabels := sets.New[string]()
 	for _, n := range nodes {
@@ -434,6 +440,12 @@ func scaleMachineSetsForRequirement(r nodeRequirement, machineSets []machinev1be
 	var result []machinev1beta1.MachineSet
 	needCount := r.count
 
+	// Find any available nodes of the specified size. These may not be ready
+	// yet but will allow scheduling soon.
+	// Available nodes must:
+	// 1 - have the request serving label
+	// 2 - have matching size label
+	// 3 - have a pair label that is not already taken
 	availableNodes := filterNodes(nodes, func(n *corev1.Node) bool {
 		return n.Labels[hyperv1.RequestServingComponentLabel] != "" &&
 			n.Labels[hyperv1.NodeSizeLabel] == r.sizeLabel &&
@@ -449,6 +461,14 @@ func scaleMachineSetsForRequirement(r nodeRequirement, machineSets []machinev1be
 		}
 	}
 
+	// Find machinesets that have already been scaled up but do not have
+	// any nodes yet.
+	// Pending machinesets must:
+	// 1 - have the request serving label
+	// 2 - have matching size label
+	// 3 - be scaled up without available replicas
+	// 4 - not correspond to any available nodes
+	// 5 - not have a pair label that is assigned to a cluster
 	pendingMachineSets := filterMachineSets(machineSets, func(ms *machinev1beta1.MachineSet) bool {
 		return isRequestServingMachineSet(ms) &&
 			machineSetSize(ms) == r.sizeLabel &&
@@ -463,6 +483,7 @@ func scaleMachineSetsForRequirement(r nodeRequirement, machineSets []machinev1be
 		return nil
 	}
 
+	// Scale up the paired machineset for any pending machinesets that need it
 	for _, ms := range pendingMachineSets {
 		if pairedMachineSet := matchingMachineSet(&ms, machineSets); pairedMachineSet != nil {
 			if ptr.Deref(pairedMachineSet.Spec.Replicas, 0) == 0 {
@@ -481,6 +502,12 @@ func scaleMachineSetsForRequirement(r nodeRequirement, machineSets []machinev1be
 }
 
 func pickAvailableMachineSetPairs(sizeLabel string, needCount int, machineSets []machinev1beta1.MachineSet, takenPairLabels sets.Set[string]) []machinev1beta1.MachineSet {
+	// Pick random pairs from available machinesets.
+	// Available machinesets must:
+	// 1 - have the request serving label
+	// 2 - have the corresponding size label
+	// 3 - not be scaled up
+	// 4 - have a pair label that is not already taken
 	availableMachineSets := filterMachineSets(machineSets, func(ms *machinev1beta1.MachineSet) bool {
 		return isRequestServingMachineSet(ms) &&
 			machineSetSize(ms) == sizeLabel &&
diff --git a/hypershift-operator/controllers/scheduler/aws/dedicated_request_serving_nodes.go b/hypershift-operator/controllers/scheduler/aws/dedicated_request_serving_nodes.go
index 69dbb720cae..750930ff5cc 100644
--- a/hypershift-operator/controllers/scheduler/aws/dedicated_request_serving_nodes.go
+++ b/hypershift-operator/controllers/scheduler/aws/dedicated_request_serving_nodes.go
@@ -174,6 +174,7 @@ func (r *DedicatedServingComponentScheduler) Reconcile(ctx context.Context, req
 		return ctrl.Result{}, fmt.Errorf("found too many dedicated nodes for HC: %v", len(dedicatedNodesForHC.Items))
 	}
 
+	// We check existing dedicated Nodes are 2. If not e.g. some was deleted, continue.
 	if scheduled := hcluster.Annotations[hyperv1.HostedClusterScheduledAnnotation]; scheduled == "true" && len(dedicatedNodesForHC.Items) == 2 {
 		log.Info("hosted cluster is already scheduled, nothing to do")
 		return ctrl.Result{}, nil
@@ -307,6 +308,7 @@ func (r *DedicatedServingComponentScheduler) updateHostedClusterAnnotations(ctx
 		}
 		if node.Labels[schedulerutil.LBSubnetsLabel] != "" && lbSubnets == "" {
 			lbSubnets = node.Labels[schedulerutil.LBSubnetsLabel]
+			// If subnets are separated by periods, replace them with commas
 			lbSubnets = strings.ReplaceAll(lbSubnets, ".", ",")
 		}
 		if node.Labels[OSDFleetManagerPairedNodesLabel] != "" && pairLabel == "" {
@@ -509,9 +511,11 @@ func (r *DedicatedServingComponentSchedulerAndSizer) Reconcile(ctx context.Conte
 
 	if len(nodesByZone) > 1 {
 		log.Info("sufficient nodes exist for placement")
+		// If we have enough nodes, update the hosted cluster.
 		if err := schedulerutil.UpdateHostedCluster(ctx, r.Client, hc, desiredSize, &config, goalNodes); err != nil {
 			return ctrl.Result{}, err
 		}
+		// Ensure we don't have a placeholder deployment, since we have nodes
 		log.Info("removing placeholder")
 		if err := r.deletePlaceholderDeployment(ctx, hc); err != nil {
 			return ctrl.Result{}, err
@@ -578,6 +582,10 @@ func (r *DedicatedServingComponentSchedulerAndSizer) backfillOrClaimNodes(ctx co
 				needClusterLabel = append(needClusterLabel, node)
 			}
 		}
+		// Find any nodes that are in the same fleet manager group and have the right size
+		// but are not labeled with the hosted cluster label. Ensure that these nodes are labeled
+		// and tainted with the hosted cluster label. This can happen if not all nodes were labeled/tainted
+		// when they were initially selected.
 		if len(needClusterLabel) > 0 {
 			log.Info("backfilling node labels")
 			for _, node := range needClusterLabel {
@@ -680,6 +688,7 @@ func (r *DedicatedServingComponentSchedulerAndSizer) deployAndLabelPlaceholderNo
 			return ctrl.Result{}, err
 		}
 	}
+	// Ensure that any placeholder deployment is deleted
 	log.Info("removing placeholder")
 	if err := r.deletePlaceholderDeployment(ctx, hc); err != nil {
 		return ctrl.Result{}, err
diff --git a/hypershift-operator/main.go b/hypershift-operator/main.go
index 7f54779aeba..29bbad491fb 100644
--- a/hypershift-operator/main.go
+++ b/hypershift-operator/main.go
@@ -432,6 +432,8 @@ func resolveOperatorImage(ctx context.Context, mgr ctrl.Manager, opts *StartOpti
 		if err := mgr.GetAPIReader().Get(ctx, crclient.ObjectKeyFromObject(me), me); err != nil {
 			return "", fmt.Errorf("failed to get operator pod %s: %w", crclient.ObjectKeyFromObject(me), err)
 		}
+		// Use the container status to make sure we get the sha256 reference rather than a potentially
+		// floating tag.
 		for _, container := range me.Status.ContainerStatuses {
 			if container.Name == "operator" {
 				return strings.TrimPrefix(container.ImageID, "docker-pullable://"), nil
@@ -446,6 +448,7 @@ func resolveOperatorImage(ctx context.Context, mgr ctrl.Manager, opts *StartOpti
 		if err != nil {
 			return false, err
 		}
+		// Apparently this is occasionally set to an empty string
 		if operatorImage == "" {
 			log.Info("operator image is empty, retrying")
 			return false, nil
@@ -538,6 +541,9 @@ func setupHostedClusterController(ctx context.Context, mgr ctrl.Manager, opts *S
 }
 
 func cleanupLegacyWebhook(ctx context.Context, mgr ctrl.Manager, opts *StartOptions) error {
+	// Since we dropped the validation webhook server we need to ensure this resource doesn't exist
+	// otherwise it will intercept kas requests and fail.
+	// TODO (alberto): dropped in 4.14.
 	if opts.EnableValidatingWebhook {
 		return nil
 	}
@@ -629,6 +635,7 @@ func setupPlatformControllers(mgr ctrl.Manager, opts *StartOptions, mgmtClusterC
 }
 
 func setupAzurePlatformController(mgr ctrl.Manager, log logr.Logger) error {
+	// ARO HCP uses Swift networking, not Private Link Services
 	if azureutil.IsAroHCP() {
 		return nil
 	}
@@ -732,6 +739,8 @@ func setupSupportControllers(mgr ctrl.Manager, opts *StartOptions, mgmtClusterCa
 		return fmt.Errorf("unable to create supported version controller: %w", err)
 	}
 
+	// If enabled, start controller to ensure UWM stack is enabled and configured
+	// to remotely write telemetry metrics.
 	if opts.EnableUWMTelemetryRemoteWrite {
 		if err := (&uwmtelemetry.Reconciler{
 			Namespace:              opts.Namespace,
diff --git a/ignition-server/controllers/local_ignitionprovider.go b/ignition-server/controllers/local_ignitionprovider.go
index d104689af3a..7a17217734d 100644
--- a/ignition-server/controllers/local_ignitionprovider.go
+++ b/ignition-server/controllers/local_ignitionprovider.go
@@ -234,6 +234,7 @@ func (p *LocalIgnitionProvider) resolveMCOImage(ctx context.Context, imageProvid
 		return "", err
 	}
 
+	// Making sure image uses the registry override for disconnected environments
 	mcoComposedImage := checkedMcoImage.String()
 	if mcoComposedImage != mcoImage {
 		mcoImage = mcoComposedImage
@@ -286,6 +287,7 @@ func (p *LocalIgnitionProvider) extractImageReferences(ctx context.Context, rele
 	if err != nil {
 		return "", fmt.Errorf("failed to look up release image metadata: %w", err)
 	}
+	// Replace the release image with the mirrored release image in disconnected environment cases.
 	if p.ReleaseProvider.GetMirroredReleaseImage() != "" {
 		releaseImage = p.ReleaseProvider.GetMirroredReleaseImage()
 		log.Info("using mirrored release image", "releaseImage", releaseImage)
@@ -298,6 +300,7 @@ func (p *LocalIgnitionProvider) extractImageReferences(ctx context.Context, rele
 	return releaseImage, nil
 }
 
+// For Azure and OpenStack, extract the cloud provider config file as MCO input
 func (p *LocalIgnitionProvider) writeCloudProviderConfig(ctx context.Context, mcoDir string) error {
 	if p.CloudProvider != hyperv1.AzurePlatform && p.CloudProvider != hyperv1.OpenStackPlatform {
 		return nil
@@ -344,6 +347,7 @@ func (p *LocalIgnitionProvider) extractClusterConfigBinary(ctx context.Context,
 		return err
 	}
 
+	// Making sure image uses the registry override for disconnected environments
 	ccaComposedImage := checkedClusterConfigImage.String()
 	if ccaComposedImage != clusterConfigImage {
 		clusterConfigImage = ccaComposedImage
@@ -402,10 +406,12 @@ func (p *LocalIgnitionProvider) runMCO(ctx context.Context, dirs *payloadDirs, r
 	if err != nil {
 		return fmt.Errorf("failed to serialize pull-secret.yaml: %w", err)
 	}
+	// NOTE: This overwrites the one in the machine-config-server configmap to ensure it's the one that matches the hash used in the token secret.
 	if err = os.WriteFile(fmt.Sprintf("%s/pull-secret.yaml", dirs.configDir), []byte(serializedPullSecret), 0644); err != nil {
 		return fmt.Errorf("failed to write pull secret to config dir: %w", err)
 	}
 
+	// args contains the base args that have not changed over time.
 	args := []string{
 		"bootstrap",
 		fmt.Sprintf("--root-ca=%s/root-ca.crt", dirs.configDir),
@@ -440,6 +446,7 @@ func (p *LocalIgnitionProvider) runMCO(ctx context.Context, dirs *payloadDirs, r
 		return fmt.Errorf("machine-config-operator process failed: %w", err)
 	}
 
+	// set missing images condition on the HCP
 	if err := p.reconcileValidReleaseInfoCondition(ctx, imageProvider); err != nil {
 		log.Error(err, "failed to reconcile IgnitionValidReleaseInfo condition")
 	}
@@ -467,6 +474,8 @@ func (p *LocalIgnitionProvider) copyMCOOutputToMCC(destDir, mccDir, configDir st
 		}
 	}
 
+	// Copy machineconfigpool config data to the MCC input directory. This is
+	// important to override the pools with the ones generated by the CPO.
 	matches, err := filepath.Glob(filepath.Join(configDir, "*.machineconfigpool.yaml"))
 	if err != nil {
 		return fmt.Errorf("failed to list dir %s: %w", configDir, err)
@@ -484,10 +493,12 @@ func (p *LocalIgnitionProvider) runMCC(ctx context.Context, dirs *payloadDirs, i
 	log := ctrl.Log.WithName("get-payload")
 	start := time.Now()
 
+	// copy the image config out of the configDir and into the mccBaseDir
 	if err := copyFile(filepath.Join(dirs.configDir, "image-config.yaml"), filepath.Join(dirs.mccDir, "image-config.yaml")); err != nil {
 		return fmt.Errorf("failed to copy image-config.yaml: %w", err)
 	}
 
+	// args contains the base args that have not changed over time.
 	args := []string{
 		"bootstrap",
 		fmt.Sprintf("--manifest-dir=%s", dirs.mccDir),
@@ -496,6 +507,7 @@ func (p *LocalIgnitionProvider) runMCC(ctx context.Context, dirs *payloadDirs, i
 		fmt.Sprintf("--dest-dir=%s", dirs.mcsDir),
 	}
 
+	// For 4.14 onwards there's a requirement to include the payload version flag.
 	if payloadVersion.GTE(semver.Version{Major: 4, Minor: 14}) {
 		args = append(args,
 			fmt.Sprintf("--payload-version=%s", imageProvider.Version()),
@@ -515,6 +527,7 @@ func (p *LocalIgnitionProvider) runMCSAndFetchPayload(ctx context.Context, dirs
 	log := ctrl.Log.WithName("get-payload")
 	start := time.Now()
 
+	// The certificate is cached across calls to avoid redundant RSA key generation on every payload request.
 	certPEM, keyPEM, err := p.getOrGenerateMCSCert()
 	if err != nil {
 		return nil, fmt.Errorf("failed to generate certificates: %w", err)
@@ -536,12 +549,14 @@ func (p *LocalIgnitionProvider) runMCSAndFetchPayload(ctx context.Context, dirs
 		"--insecure-port=22626",
 	}
 
+	// For 4.14 onwards there's a requirement to include the payload version flag.
 	if payloadVersion.GTE(semver.Version{Major: 4, Minor: 14}) {
 		args = append(args,
 			fmt.Sprintf("--payload-version=%s", imageProvider.Version()),
 		)
 	}
 
+	// Spin up the MCS process and ensure it's signaled to terminate when the function returns
 	mcsCtx, cancel := context.WithCancel(ctx)
 	defer cancel()
 	cmd := exec.CommandContext(mcsCtx, filepath.Join(dirs.binDir, "machine-config-server"), args...)
@@ -554,6 +569,7 @@ func (p *LocalIgnitionProvider) runMCSAndFetchPayload(ctx context.Context, dirs
 		Timeout: 5 * time.Second,
 	}
 	var payload []byte
+	// Try connecting to the server until we get a response or the context is closed
 	err = wait.PollUntilContextCancel(ctx, 1*time.Second, true, func(ctx context.Context) (bool, error) {
 		req, err := http.NewRequestWithContext(ctx, "GET", "http://localhost:22626/config/master", nil)
 		if err != nil {
@@ -692,6 +708,7 @@ func (p *LocalIgnitionProvider) GetPayload(ctx context.Context, releaseImage, cu
 		return nil, fmt.Errorf("failed to parse payload version: %w", err)
 	}
 
+	// set the component to the correct binary name and file path based on the payload version
 	clusterConfigComponent := "cluster-config-api"
 	clusterConfigComponentShort := "cca"
 	clusterConfigFile := "usr/bin/render"
@@ -719,14 +736,17 @@ func (p *LocalIgnitionProvider) GetPayload(ctx context.Context, releaseImage, cu
 		return nil, fmt.Errorf("failed to execute %s: %w", clusterConfigComponent, err)
 	}
 
+	// First, run the MCO using templates and image refs as input. This generates output for the MCC.
 	if err := p.runMCO(ctx, dirs, releaseImage, pullSecret, mcsConfig, imageProvider, payloadVersion); err != nil {
 		return nil, fmt.Errorf("failed to execute machine-config-operator: %w", err)
 	}
 
+	// Next, run the MCC using templates and MCO output as input, producing output for the MCS.
 	if err := p.runMCC(ctx, dirs, imageProvider, payloadVersion); err != nil {
 		return nil, fmt.Errorf("failed to execute machine-config-controller: %w", err)
 	}
 
+	// Finally, run the MCS to generate a payload.
 	payload, err := p.runMCSAndFetchPayload(ctx, dirs, imageProvider, payloadVersion)
 	if err != nil {
 		return nil, fmt.Errorf("failed to get payload from mcs: %w", err)