Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion cmd/cluster-olm-operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
cache "k8s.io/client-go/tools/cache"
"k8s.io/component-base/cli"
utilflag "k8s.io/component-base/cli/flag"
"k8s.io/klog/v2"
Expand Down Expand Up @@ -234,6 +235,11 @@ func runOperator(ctx context.Context, cc *controllercmd.ControllerContext) error
return fmt.Errorf("unable to retrieve featureSet: %w", err)
}

infra, err := cl.ConfigClient.ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{})
if err != nil {
return fmt.Errorf("unable to retrieve infrastructure: %w", err)
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

clusterCatalogGvk := ocv1.GroupVersion.WithKind("ClusterCatalog")
cb := controller.Builder{
Assets: assetPath,
Expand All @@ -246,7 +252,8 @@ func runOperator(ctx context.Context, cc *controllercmd.ControllerContext) error
Scope: meta.RESTScopeRoot,
},
},
FeatureGate: *fg,
FeatureGate: *fg,
Infrastructure: infra,
}

staticResourceControllers, deploymentControllers, clusterCatalogControllers, relatedObjects, err := cb.BuildControllers("catalogd", "operator-controller")
Expand Down Expand Up @@ -366,6 +373,29 @@ func runOperator(ctx context.Context, cc *controllercmd.ControllerContext) error

operatorLoggingController := loglevel.NewClusterOperatorLoggingController(cl.OperatorClient, cc.EventRecorder.ForComponent("ClusterOLMOperatorLoggingController"))

// Watch for infrastructure topology changes. Topology changes are exceedingly rare
// (e.g., SNO to HA conversion) but require re-rendering the Helm manifests with the
// correct replica count and PDB settings. Exiting causes the deployment controller to
// restart cluster-olm-operator, which re-renders the manifests on startup.
initialTopology := infra.Status.ControlPlaneTopology
checkTopologyChange := func(obj interface{}) {
newInfra, ok := obj.(*configv1.Infrastructure)
if !ok {
return
}
if newInfra.Status.ControlPlaneTopology != initialTopology {
log.Info("Infrastructure topology changed, restarting to re-render manifests",
"old", initialTopology, "new", newInfra.Status.ControlPlaneTopology)
os.Exit(0)
}
}
if _, err := cl.InfrastructureClient.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: checkTopologyChange,
UpdateFunc: func(_, newObj interface{}) { checkTopologyChange(newObj) },
}); err != nil {
return fmt.Errorf("failed to add infrastructure event handler: %w", err)
}

cl.StartInformers(ctx)

select {
Expand Down
24 changes: 24 additions & 0 deletions pkg/clients/clients.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ type Clients struct {
ClusterObjectSetClient *ClusterObjectSetClient
ClusterCatalogClient *ClusterCatalogClient
ProxyClient *ProxyClient
InfrastructureClient *InfrastructureClient
ConfigClient configclient.Interface
KubeInformerFactory informers.SharedInformerFactory
ConfigInformerFactory configinformer.SharedInformerFactory
Expand Down Expand Up @@ -128,6 +129,7 @@ func New(cc *controllercmd.ControllerContext) (*Clients, error) {
ClusterCatalogClient: NewClusterCatalogClient(dynClient),
ClusterObjectSetClient: NewClusterObjectSetClient(dynClient),
ProxyClient: NewProxyClient(configInformerFactory),
InfrastructureClient: NewInfrastructureClient(configInformerFactory),
ConfigClient: configClient,
KubeInformerFactory: informers.NewSharedInformerFactory(kubeClient, defaultResyncPeriod),
ConfigInformerFactory: configInformerFactory,
Expand Down Expand Up @@ -290,6 +292,28 @@ func NewProxyClient(infFact configinformer.SharedInformerFactory) *ProxyClient {
}
}

type InfrastructureClientInterface interface {
Get(key string) (*configv1.Infrastructure, error)
}

type InfrastructureClient struct {
informer configinformerv1.InfrastructureInformer
}

func (ic *InfrastructureClient) Informer() cache.SharedIndexInformer {
return ic.informer.Informer()
}

func (ic *InfrastructureClient) Get(key string) (*configv1.Infrastructure, error) {
return ic.informer.Lister().Get(key)
}

func NewInfrastructureClient(infFact configinformer.SharedInformerFactory) *InfrastructureClient {
return &InfrastructureClient{
informer: infFact.Config().V1().Infrastructures(),
}
}

type OperatorClient struct {
clientset operatorclient.Interface
informers operatorinformers.SharedInformerFactory
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ type Builder struct {
ControllerContext *controllercmd.ControllerContext
KnownRESTMappings map[schema.GroupVersionKind]*meta.RESTMapping
FeatureGate configv1.FeatureGate
Infrastructure *configv1.Infrastructure
}

func (b *Builder) BuildControllers(subDirectories ...string) (map[string]factory.Controller, map[string]factory.Controller, map[string]factory.Controller, []configv1.ObjectReference, error) {
Expand Down
39 changes: 39 additions & 0 deletions pkg/controller/helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"regexp"
"strings"

configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/cluster-olm-operator/pkg/helmvalues"

yaml3 "gopkg.in/yaml.v3"
Expand Down Expand Up @@ -80,6 +81,34 @@ func (b *Builder) renderHelmTemplate(helmPath, manifestDir string) error {
return fmt.Errorf("error setting OPERATOR_CONTROLLER_IMAGE: %w", err)
}

// On HighlyAvailable topologies scale to 2 replicas and enable the PDB so that rolling
// updates never leave zero running pods. On SingleReplica (SNO) / External topologies
// the manifest defaults (replicas=1, PDB disabled) are kept as-is.
if b.Infrastructure != nil && isHighlyAvailableTopology(b.Infrastructure) {
log.Info("HighlyAvailable topology detected, setting replicas=2 and enabling PDB")
haOverrides := []struct {
key string
value interface{}
}{
{"options.catalogd.deployment.replicas", 2},
{"options.operatorController.deployment.replicas", 2},
{"options.catalogd.podDisruptionBudget.enabled", true},
{"options.operatorController.podDisruptionBudget.enabled", true},
}
for _, o := range haOverrides {
var err error
switch v := o.value.(type) {
case int:
err = values.SetIntValue(o.key, v)
case bool:
err = values.SetBoolValue(o.key, v)
}
if err != nil {
return fmt.Errorf("error setting %s: %w", o.key, err)
}
}
}

log.Info("Calculated values", "values", values.GetValues())

// Load the helm chart
Expand Down Expand Up @@ -265,6 +294,16 @@ func sanitizeFilename(name string) string {
return reg.ReplaceAllString(name, "-")
}

func isHighlyAvailableTopology(infra *configv1.Infrastructure) bool {
switch infra.Status.ControlPlaneTopology {
case configv1.HighlyAvailableTopologyMode,
configv1.HighlyAvailableArbiterMode,
configv1.DualReplicaTopologyMode:
return true
}
return false
}

func writeDocument(filePath, content string) error {
file, err := os.Create(filePath)
if err != nil {
Expand Down
16 changes: 16 additions & 0 deletions pkg/helmvalues/helmvalues.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ func (v *HelmValues) SetStringValue(location string, newValue string) error {
return unstructured.SetNestedField(v.values, newValue, ss...)
}

func (v *HelmValues) SetIntValue(location string, newValue int) error {
if location == "" {
return errors.New("location string has no locations")
}
ss := strings.Split(location, ".")
return unstructured.SetNestedField(v.values, int64(newValue), ss...)
}

func (v *HelmValues) SetBoolValue(location string, newValue bool) error {
if location == "" {
return errors.New("location string has no locations")
}
ss := strings.Split(location, ".")
return unstructured.SetNestedField(v.values, newValue, ss...)
}

func (v *HelmValues) AddListValue(location string, newValue string) error {
if location == "" {
return errors.New("location string has no locations")
Expand Down