diff --git a/.github/workflows/build-and-push.yaml b/.github/workflows/build-and-push.yaml index 7d30fdf8..29cb575d 100644 --- a/.github/workflows/build-and-push.yaml +++ b/.github/workflows/build-and-push.yaml @@ -4,6 +4,7 @@ on: push: branches: - main + - lcore-migration workflow_dispatch: {} env: @@ -32,7 +33,13 @@ jobs: BUNDLE_IMAGE=${IMAGE_TAG_BASE}-bundle:v${VERSION} CATALOG_IMAGE=${IMAGE_TAG_BASE}-catalog:v${VERSION} - LATEST_TAG=latest + # `main` branch -> `latest` tag + # any other branch -> `[branch-name]-latest` tag + if [[ "${{ github.ref_name }}" == "main" ]]; then + LATEST_TAG=latest + else + LATEST_TAG=${{ github.ref_name }}-latest + fi OPERATOR_IMAGE_LATEST=${IMAGE_TAG_BASE}:${LATEST_TAG} BUNDLE_IMAGE_LATEST=${IMAGE_TAG_BASE}-bundle:${LATEST_TAG} CATALOG_IMAGE_LATEST=${IMAGE_TAG_BASE}-catalog:${LATEST_TAG} diff --git a/Makefile b/Makefile index 05546809..763dc7d0 100644 --- a/Makefile +++ b/Makefile @@ -62,8 +62,6 @@ endif OPERATOR_SDK_VERSION ?= v1.38.0-ocp # Image URL to use all building/pushing image targets IMG ?= $(IMAGE_TAG_BASE):latest -# OPENSHIFT_LIGHTSPEED_OPERATOR_VERSION defines the version injected into the operator (OLS operator version) -OPENSHIFT_LIGHTSPEED_OPERATOR_VERSION ?= latest # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.30.0 @@ -203,17 +201,6 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - -.PHONY: ols-deploy -ols-deploy: export OUTPUT_DIR = out -ols-deploy: ## Deploy OpenShift Lightspeed Operator - bash scripts/gen-ols.sh - oc apply -f $(OUTPUT_DIR)/ols - -.PHONY: ols-undeploy -ols-undeploy: export OUTPUT_DIR = out -ols-undeploy: ## Deploy OpenShift Lightspeed Operator - find $(OUTPUT_DIR)/ols -name "*.yaml" -printf " -f %p" | xargs oc delete --ignore-not-found=$(ignore-not-found) - .PHONY: undeploy undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. $(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - @@ -329,7 +316,6 @@ endif bundle: manifests kustomize operator-sdk ## Generate bundle manifests and metadata, then validate generated files. $(OPERATOR_SDK) generate kustomize manifests -q cd config/manager && $(KUSTOMIZE) edit set image controller=$(IMG) - cd config/manager && $(KUSTOMIZE) edit add patch --kind Deployment --name controller-manager --patch "[{\"op\": \"replace\", \"path\": \"/spec/template/spec/containers/0/env/0/value\", \"value\": \"$(OPENSHIFT_LIGHTSPEED_OPERATOR_VERSION)\"}]" $(KUSTOMIZE) build config/manifests | $(OPERATOR_SDK) generate bundle $(BUNDLE_GEN_FLAGS) $(OPERATOR_SDK) bundle validate ./bundle diff --git a/README.md b/README.md index 788d2f44..5a516d6f 100644 --- a/README.md +++ b/README.md @@ -238,9 +238,5 @@ make kuttl-test-run ``` **Important Notes:** -- The tests use the `openshift-lightspeed` namespace to test in the exact namespace -where the OLS operator is expected to operate. -- The correct behavior of the OLS operator is not guaranteed outside of the -`openshift-lightspeed` namespace. - Ensure the namespace is clean before running tests to avoid resource conflicts or test failures. diff --git a/api/v1beta1/conditions.go b/api/v1beta1/conditions.go index cd96fe95..3c990416 100644 --- a/api/v1beta1/conditions.go +++ b/api/v1beta1/conditions.go @@ -63,4 +63,10 @@ const ( // OCPRAGOverrideInvalidMessage OCPRAGOverrideInvalidMessage = "Invalid OCP RAG version override" + + // DeploymentCheckFailedMessage + DeploymentCheckFailedMessage = "Failed to check deployment status: %s" + + // DeploymentsNotReadyMessage + DeploymentsNotReadyMessage = "Waiting for deployments to be ready: %s" ) diff --git a/api/v1beta1/openstacklightspeed_types.go b/api/v1beta1/openstacklightspeed_types.go index 17237357..83ae9921 100644 --- a/api/v1beta1/openstacklightspeed_types.go +++ b/api/v1beta1/openstacklightspeed_types.go @@ -23,11 +23,20 @@ import ( ) const ( - // Container image fall-back defaults - // OpenStackLightspeedContainerImage is the fall-back container image for OpenStackLightspeed OpenStackLightspeedContainerImage = "quay.io/openstack-lightspeed/rag-content:os-docs-2025.2" - MaxTokensForResponseDefault = 2048 + + // LCoreContainerImage is the fall-back container image for LCore + LCoreContainerImage = "quay.io/lightspeed-core/lightspeed-stack:latest" + + // ExporterContainerImage is the fall-back container image for the Dataverse Exporter + ExporterContainerImage = "quay.io/lightspeed-core/lightspeed-to-dataverse-exporter:latest" + + // PostgresContainerImage is the fall-back container image for PostgreSQL + PostgresContainerImage = "registry.redhat.io/rhel9/postgresql-16:latest" + + // MaxTokensForResponseDefault is the default maximum number of tokens that should be used for response + MaxTokensForResponseDefault = 2048 ) // OpenStackLightspeedSpec defines the desired state of OpenStackLightspeed @@ -82,16 +91,6 @@ type OpenStackLightspeedCore struct { // MaxTokensForResponse defines the maximum number of tokens to be used for the response generation MaxTokensForResponse int `json:"maxTokensForResponse,omitempty"` - // +kubebuilder:validation:Optional - // +kubebuilder:default="openshift-marketplace" - // Namespace where the CatalogSource containing the OLS operator is located - CatalogSourceNamespace string `json:"catalogSourceNamespace"` - - // +kubebuilder:validation:Optional - // +kubebuilder:default="redhat-operators" - // Name of the CatalogSource that contains the OLS Operator - CatalogSourceName string `json:"catalogSourceName"` - // +kubebuilder:validation:Optional // Project ID for LLM providers that require it (e.g., WatsonX) LLMProjectID string `json:"llmProjectID,omitempty"` @@ -131,10 +130,26 @@ type OpenStackLightspeedStatus struct { // +kubebuilder:subresource:status // +kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.conditions[0].status",description="Status" // +kubebuilder:printcolumn:name="Message",type="string",JSONPath=".status.conditions[0].message",description="Message" -// +operator-sdk:csv:customresourcedefinitions:resources={{OLSConfig,v1alpha1,cluster}} +// +operator-sdk:csv:customresourcedefinitions:resources={{Deployment,v1,lightspeed-stack-deployment}} +// +operator-sdk:csv:customresourcedefinitions:resources={{Deployment,v1,lightspeed-postgres-server}} +// +operator-sdk:csv:customresourcedefinitions:resources={{Service,v1,lightspeed-app-server}} +// +operator-sdk:csv:customresourcedefinitions:resources={{Service,v1,lightspeed-postgres-server}} +// +operator-sdk:csv:customresourcedefinitions:resources={{ConfigMap,v1,llama-stack-config}} +// +operator-sdk:csv:customresourcedefinitions:resources={{ConfigMap,v1,lightspeed-stack-config}} +// +operator-sdk:csv:customresourcedefinitions:resources={{ConfigMap,v1,lightspeed-postgres-conf}} +// +operator-sdk:csv:customresourcedefinitions:resources={{Secret,v1,lightspeed-postgres-secret}} +// +operator-sdk:csv:customresourcedefinitions:resources={{Secret,v1,lightspeed-postgres-bootstrap}} +// +operator-sdk:csv:customresourcedefinitions:resources={{Secret,v1,metrics-reader-token}} +// +operator-sdk:csv:customresourcedefinitions:resources={{Secret,v1,lightspeed-tls}} +// +operator-sdk:csv:customresourcedefinitions:resources={{Secret,v1,lightspeed-postgres-certs}} +// +operator-sdk:csv:customresourcedefinitions:resources={{ServiceAccount,v1,lightspeed-app-server}} +// +operator-sdk:csv:customresourcedefinitions:resources={{NetworkPolicy,v1,lightspeed-app-server}} +// +operator-sdk:csv:customresourcedefinitions:resources={{NetworkPolicy,v1,lightspeed-postgres-server}} +// +operator-sdk:csv:customresourcedefinitions:resources={{ClusterRole,v1,lightspeed-app-server-sar-role}} +// +operator-sdk:csv:customresourcedefinitions:resources={{ClusterRoleBinding,v1,lightspeed-app-server-sar-role-binding}} // +operator-sdk:csv:customresourcedefinitions:resources={{Subscription,v1alpha1}} // +operator-sdk:csv:customresourcedefinitions:resources={{ClusterServiceVersion,v1alpha1}} -// +operator-sdk:csv:customresourcedefinitions:resources={{InstallPlan,v1alpha}} +// +operator-sdk:csv:customresourcedefinitions:resources={{InstallPlan,v1alpha1}} // OpenStackLightspeed is the Schema for the openstacklightspeeds API type OpenStackLightspeed struct { @@ -165,6 +180,9 @@ func (instance OpenStackLightspeed) IsReady() bool { type OpenStackLightspeedDefaults struct { RAGImageURL string + LCoreImageURL string + ExporterImageURL string + PostgresImageURL string MaxTokensForResponse int } @@ -176,6 +194,12 @@ func SetupDefaults() { openStackLightspeedDefaults := OpenStackLightspeedDefaults{ RAGImageURL: util.GetEnvVar( "RELATED_IMAGE_OPENSTACK_LIGHTSPEED_IMAGE_URL_DEFAULT", OpenStackLightspeedContainerImage), + LCoreImageURL: util.GetEnvVar( + "RELATED_IMAGE_LCORE_IMAGE_URL_DEFAULT", LCoreContainerImage), + ExporterImageURL: util.GetEnvVar( + "RELATED_IMAGE_EXPORTER_IMAGE_URL_DEFAULT", ExporterContainerImage), + PostgresImageURL: util.GetEnvVar( + "RELATED_IMAGE_POSTGRES_IMAGE_URL_DEFAULT", PostgresContainerImage), MaxTokensForResponse: MaxTokensForResponseDefault, } diff --git a/bundle/manifests/lightspeed.openstack.org_openstacklightspeeds.yaml b/bundle/manifests/lightspeed.openstack.org_openstacklightspeeds.yaml index 8bac440e..a9fa4a71 100644 --- a/bundle/manifests/lightspeed.openstack.org_openstacklightspeeds.yaml +++ b/bundle/manifests/lightspeed.openstack.org_openstacklightspeeds.yaml @@ -49,15 +49,6 @@ spec: spec: description: OpenStackLightspeedSpec defines the desired state of OpenStackLightspeed properties: - catalogSourceName: - default: redhat-operators - description: Name of the CatalogSource that contains the OLS Operator - type: string - catalogSourceNamespace: - default: openshift-marketplace - description: Namespace where the CatalogSource containing the OLS - operator is located - type: string enableOCPRAG: default: false description: Enables automatic OCP documentation based on cluster diff --git a/bundle/manifests/openstack-lightspeed-operator-manager-rolebinding_rbac.authorization.k8s.io_v1_rolebinding.yaml b/bundle/manifests/openstack-lightspeed-operator-manager-rolebinding_rbac.authorization.k8s.io_v1_rolebinding.yaml new file mode 100644 index 00000000..341f0e2d --- /dev/null +++ b/bundle/manifests/openstack-lightspeed-operator-manager-rolebinding_rbac.authorization.k8s.io_v1_rolebinding.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: openstack-lightspeed-operator + name: openstack-lightspeed-operator-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: manager-role +subjects: +- kind: ServiceAccount + name: openstack-lightspeed-operator-controller-manager + namespace: openstack-lightspeed-operator-system diff --git a/bundle/manifests/openstack-lightspeed-operator.clusterserviceversion.yaml b/bundle/manifests/openstack-lightspeed-operator.clusterserviceversion.yaml index 9b2b7299..f204b7bb 100644 --- a/bundle/manifests/openstack-lightspeed-operator.clusterserviceversion.yaml +++ b/bundle/manifests/openstack-lightspeed-operator.clusterserviceversion.yaml @@ -25,7 +25,7 @@ metadata: ] capabilities: Basic Install categories: AI/Machine Learning - createdAt: "2026-02-19T13:45:56Z" + createdAt: "2026-04-09T11:08:52Z" description: AI-powered virtual assistant for Red Hat OpenStack Services on OpenShift features.operators.openshift.io/cnf: "false" features.operators.openshift.io/cni: "false" @@ -37,12 +37,12 @@ metadata: features.operators.openshift.io/token-auth-aws: "false" features.operators.openshift.io/token-auth-azure: "false" features.operators.openshift.io/token-auth-gcp: "false" - operatorframework.io/suggested-namespace: openshift-lightspeed + operatorframework.io/suggested-namespace: openstack-lightspeed operators.operatorframework.io/builder: operator-sdk-v1.38.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 repository: https://github.com/openstack-lightspeed/operator name: openstack-lightspeed-operator.v0.0.1 - namespace: openshift-lightspeed + namespace: openstack-lightspeed spec: apiservicedefinitions: {} customresourcedefinitions: @@ -58,13 +58,61 @@ spec: version: v1alpha1 - kind: InstallPlan name: "" - version: v1alpha + version: v1alpha1 - kind: Subscription name: "" version: v1alpha1 - - kind: OLSConfig - name: cluster - version: v1alpha1 + - kind: NetworkPolicy + name: lightspeed-app-server + version: v1 + - kind: Service + name: lightspeed-app-server + version: v1 + - kind: ServiceAccount + name: lightspeed-app-server + version: v1 + - kind: ClusterRole + name: lightspeed-app-server-sar-role + version: v1 + - kind: ClusterRoleBinding + name: lightspeed-app-server-sar-role-binding + version: v1 + - kind: Secret + name: lightspeed-postgres-bootstrap + version: v1 + - kind: Secret + name: lightspeed-postgres-certs + version: v1 + - kind: ConfigMap + name: lightspeed-postgres-conf + version: v1 + - kind: Secret + name: lightspeed-postgres-secret + version: v1 + - kind: Deployment + name: lightspeed-postgres-server + version: v1 + - kind: NetworkPolicy + name: lightspeed-postgres-server + version: v1 + - kind: Service + name: lightspeed-postgres-server + version: v1 + - kind: ConfigMap + name: lightspeed-stack-config + version: v1 + - kind: Deployment + name: lightspeed-stack-deployment + version: v1 + - kind: Secret + name: lightspeed-tls + version: v1 + - kind: ConfigMap + name: llama-stack-config + version: v1 + - kind: Secret + name: metrics-reader-token + version: v1 specDescriptors: - description: |- Secret name containing API token for the LLMEndpoint. The secret must contain @@ -133,38 +181,26 @@ spec: - patch - update - apiGroups: - - ols.openshift.io + - operators.coreos.com resources: - - olsconfigs + - clusterserviceversions verbs: - - create - - delete - get - list - - patch - - update - watch - apiGroups: - - ols.openshift.io - resources: - - olsconfigs/finalizers - verbs: - - update - - apiGroups: - - ols.openshift.io + - rbac.authorization.k8s.io resources: - - olsconfigs/status + - clusterrolebindings + - clusterroles verbs: + - create + - delete + - deletecollection - get + - list - patch - update - - apiGroups: - - operators.coreos.com - resources: - - clusterserviceversions - verbs: - - get - - list - watch - apiGroups: - authentication.k8s.io @@ -206,14 +242,18 @@ spec: command: - /manager env: - - name: OPENSHIFT_LIGHTSPEED_OPERATOR_VERSION - value: latest - name: WATCH_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.annotations['olm.targetNamespaces'] - name: RELATED_IMAGE_OPENSTACK_LIGHTSPEED_IMAGE_URL_DEFAULT value: quay.io/openstack-lightspeed/rag-content:os-docs-2025.2 + - name: RELATED_IMAGE_LCORE_IMAGE_URL_DEFAULT + value: quay.io/lightspeed-core/lightspeed-stack:latest + - name: RELATED_IMAGE_EXPORTER_IMAGE_URL_DEFAULT + value: quay.io/lightspeed-core/lightspeed-to-dataverse-exporter:latest + - name: RELATED_IMAGE_POSTGRES_IMAGE_URL_DEFAULT + value: registry.redhat.io/rhel9/postgresql-16:latest image: quay.io/openstack-lightspeed/operator:latest livenessProbe: httpGet: @@ -278,35 +318,81 @@ spec: - create - patch - apiGroups: - - operators.coreos.com + - "" resources: - - clusterserviceversions + - configmaps verbs: + - create - delete + - get + - list - patch - update + - watch - apiGroups: - - operators.coreos.com + - "" resources: - - installplans + - secrets verbs: + - create - delete + - deletecollection - get - list + - patch - update - watch - apiGroups: - - operators.coreos.com + - "" resources: - - subscriptions + - serviceaccounts + verbs: + - create + - get + - list + - patch + - watch + - apiGroups: + - "" + resources: + - services + verbs: + - create + - get + - list + - patch + - update + - watch + - apiGroups: + - apps + resources: + - deployments + verbs: + - create + - get + - list + - patch + - update + - watch + - apiGroups: + - networking.k8s.io + resources: + - networkpolicies verbs: - create - - delete - get - list - patch - update - watch + - apiGroups: + - operators.coreos.com + resources: + - clusterserviceversions + verbs: + - delete + - patch + - update serviceAccountName: openstack-lightspeed-operator-controller-manager strategy: deployment installModes: @@ -341,4 +427,10 @@ spec: relatedImages: - image: quay.io/openstack-lightspeed/rag-content:os-docs-2025.2 name: openstack-lightspeed-image-url-default + - image: quay.io/lightspeed-core/lightspeed-stack:latest + name: lcore-image-url-default + - image: quay.io/lightspeed-core/lightspeed-to-dataverse-exporter:latest + name: exporter-image-url-default + - image: registry.redhat.io/rhel9/postgresql-16:latest + name: postgres-image-url-default version: 0.0.1 diff --git a/config/crd/bases/lightspeed.openstack.org_openstacklightspeeds.yaml b/config/crd/bases/lightspeed.openstack.org_openstacklightspeeds.yaml index 9b19ba74..5f5b9071 100644 --- a/config/crd/bases/lightspeed.openstack.org_openstacklightspeeds.yaml +++ b/config/crd/bases/lightspeed.openstack.org_openstacklightspeeds.yaml @@ -49,15 +49,6 @@ spec: spec: description: OpenStackLightspeedSpec defines the desired state of OpenStackLightspeed properties: - catalogSourceName: - default: redhat-operators - description: Name of the CatalogSource that contains the OLS Operator - type: string - catalogSourceNamespace: - default: openshift-marketplace - description: Namespace where the CatalogSource containing the OLS - operator is located - type: string enableOCPRAG: default: false description: Enables automatic OCP documentation based on cluster diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 788d464a..3e4fae29 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -66,14 +66,18 @@ spec: image: controller:latest name: manager env: - - name: "OPENSHIFT_LIGHTSPEED_OPERATOR_VERSION" - value: "latest" - name: WATCH_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - name: RELATED_IMAGE_OPENSTACK_LIGHTSPEED_IMAGE_URL_DEFAULT value: quay.io/openstack-lightspeed/rag-content:os-docs-2025.2 + - name: RELATED_IMAGE_LCORE_IMAGE_URL_DEFAULT + value: quay.io/lightspeed-core/lightspeed-stack:latest + - name: RELATED_IMAGE_EXPORTER_IMAGE_URL_DEFAULT + value: quay.io/lightspeed-core/lightspeed-to-dataverse-exporter:latest + - name: RELATED_IMAGE_POSTGRES_IMAGE_URL_DEFAULT + value: registry.redhat.io/rhel9/postgresql-16:latest securityContext: allowPrivilegeEscalation: false capabilities: diff --git a/config/manifests/bases/openstack-lightspeed-operator.clusterserviceversion.yaml b/config/manifests/bases/openstack-lightspeed-operator.clusterserviceversion.yaml index b447d209..a14cccfe 100644 --- a/config/manifests/bases/openstack-lightspeed-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/openstack-lightspeed-operator.clusterserviceversion.yaml @@ -16,10 +16,10 @@ metadata: features.operators.openshift.io/token-auth-aws: "false" features.operators.openshift.io/token-auth-azure: "false" features.operators.openshift.io/token-auth-gcp: "false" - operatorframework.io/suggested-namespace: openshift-lightspeed + operatorframework.io/suggested-namespace: openstack-lightspeed repository: https://github.com/openstack-lightspeed/operator name: openstack-lightspeed-operator.v0.0.0 - namespace: openshift-lightspeed + namespace: openstack-lightspeed spec: apiservicedefinitions: {} customresourcedefinitions: @@ -35,13 +35,61 @@ spec: version: v1alpha1 - kind: InstallPlan name: "" - version: v1alpha + version: v1alpha1 - kind: Subscription name: "" version: v1alpha1 - - kind: OLSConfig - name: cluster - version: v1alpha1 + - kind: NetworkPolicy + name: lightspeed-app-server + version: v1 + - kind: Service + name: lightspeed-app-server + version: v1 + - kind: ServiceAccount + name: lightspeed-app-server + version: v1 + - kind: ClusterRole + name: lightspeed-app-server-sar-role + version: v1 + - kind: ClusterRoleBinding + name: lightspeed-app-server-sar-role-binding + version: v1 + - kind: Secret + name: lightspeed-postgres-bootstrap + version: v1 + - kind: Secret + name: lightspeed-postgres-certs + version: v1 + - kind: ConfigMap + name: lightspeed-postgres-conf + version: v1 + - kind: Secret + name: lightspeed-postgres-secret + version: v1 + - kind: Deployment + name: lightspeed-postgres-server + version: v1 + - kind: NetworkPolicy + name: lightspeed-postgres-server + version: v1 + - kind: Service + name: lightspeed-postgres-server + version: v1 + - kind: ConfigMap + name: lightspeed-stack-config + version: v1 + - kind: Deployment + name: lightspeed-stack-deployment + version: v1 + - kind: Secret + name: lightspeed-tls + version: v1 + - kind: ConfigMap + name: llama-stack-config + version: v1 + - kind: Secret + name: metrics-reader-token + version: v1 specDescriptors: - description: |- Secret name containing API token for the LLMEndpoint. The secret must contain diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 0c785742..81d3ffb1 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -39,73 +39,107 @@ rules: - patch - update - apiGroups: - - ols.openshift.io + - operators.coreos.com + resources: + - clusterserviceversions + verbs: + - get + - list + - watch +- apiGroups: + - rbac.authorization.k8s.io resources: - - olsconfigs + - clusterrolebindings + - clusterroles verbs: - create - delete + - deletecollection - get - list - patch - update - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: manager-role + namespace: openstack-lightspeed +rules: - apiGroups: - - ols.openshift.io + - "" resources: - - olsconfigs/finalizers + - configmaps verbs: + - create + - delete + - get + - list + - patch - update + - watch - apiGroups: - - ols.openshift.io + - "" resources: - - olsconfigs/status + - secrets verbs: + - create + - delete + - deletecollection - get + - list - patch - update + - watch - apiGroups: - - operators.coreos.com + - "" resources: - - clusterserviceversions + - serviceaccounts verbs: + - create - get - list + - patch - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: manager-role - namespace: openshift-lightspeed -rules: - apiGroups: - - operators.coreos.com + - "" resources: - - clusterserviceversions + - services verbs: - - delete + - create + - get + - list - patch - update + - watch - apiGroups: - - operators.coreos.com + - apps resources: - - installplans + - deployments verbs: - - delete + - create - get - list + - patch - update - watch - apiGroups: - - operators.coreos.com + - networking.k8s.io resources: - - subscriptions + - networkpolicies verbs: - create - - delete - get - list - patch - update - watch +- apiGroups: + - operators.coreos.com + resources: + - clusterserviceversions + verbs: + - delete + - patch + - update diff --git a/hack/env.sh b/hack/env.sh new file mode 100644 index 00000000..73ed18e0 --- /dev/null +++ b/hack/env.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export RELATED_IMAGE_LCORE_IMAGE_URL_DEFAULT="quay.io/lightspeed-core/lightspeed-stack:latest" +export RELATED_IMAGE_EXPORTER_IMAGE_URL_DEFAULT="quay.io/lightspeed-core/lightspeed-to-dataverse-exporter:latest" +export RELATED_IMAGE_POSTGRES_IMAGE_URL_DEFAULT="registry.redhat.io/rhel9/postgresql-16:latest" +export RELATED_IMAGE_OPENSTACK_LIGHTSPEED_IMAGE_URL_DEFAULT=quay.io/openstack-lightspeed/rag-content:os-docs-2025.2 +export WATCH_NAMESPACE="openstack-lightspeed" diff --git a/internal/controller/assets/postgres.conf b/internal/controller/assets/postgres.conf new file mode 100644 index 00000000..67446efe --- /dev/null +++ b/internal/controller/assets/postgres.conf @@ -0,0 +1,5 @@ +huge_pages = off +ssl = on +ssl_cert_file = '/etc/certs/tls.crt' +ssl_key_file = '/etc/certs/tls.key' +ssl_ca_file = '/etc/certs/cm-olspostgresca/service-ca.crt' diff --git a/internal/controller/assets/postgres_bootstrap.sh b/internal/controller/assets/postgres_bootstrap.sh new file mode 100644 index 00000000..7bda6ce3 --- /dev/null +++ b/internal/controller/assets/postgres_bootstrap.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -e + +cat /var/lib/pgsql/data/userdata/postgresql.conf + +echo "attempting to create llama-stack database and pg_trgm extension if they do not exist" + +_psql () { psql --set ON_ERROR_STOP=1 "$@" ; } + +# Create database for llama-stack conversation storage +DB_NAME="llamastack" + +echo "SELECT 'CREATE DATABASE $DB_NAME' WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = '$DB_NAME')\gexec" | _psql -d $POSTGRESQL_DATABASE + +# Create pg_trgm extension in default database (for OpenStack Lightspeed conversation cache) +echo "CREATE EXTENSION IF NOT EXISTS pg_trgm;" | _psql -d $POSTGRESQL_DATABASE + +# Create pg_trgm extension in llama-stack database (for text search if needed) +echo "CREATE EXTENSION IF NOT EXISTS pg_trgm;" | _psql -d $DB_NAME + +# Create schemas for isolating different components' data +echo "CREATE SCHEMA IF NOT EXISTS lcore;" | _psql -d $POSTGRESQL_DATABASE +echo "CREATE SCHEMA IF NOT EXISTS quota;" | _psql -d $POSTGRESQL_DATABASE +echo "CREATE SCHEMA IF NOT EXISTS conversation_cache;" | _psql -d $POSTGRESQL_DATABASE diff --git a/internal/controller/system_prompt.txt b/internal/controller/assets/system_prompt.txt similarity index 100% rename from internal/controller/system_prompt.txt rename to internal/controller/assets/system_prompt.txt diff --git a/internal/controller/common.go b/internal/controller/common.go new file mode 100644 index 00000000..183af712 --- /dev/null +++ b/internal/controller/common.go @@ -0,0 +1,176 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + _ "embed" + "errors" + "fmt" + "strings" + + common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + k8s_errors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/config" +) + +// toPtr returns a pointer to the given value. +func toPtr[T any](v T) *T { + return &v +} + +// getRawClient returns a raw client that is not restricted to WATCH_NAMESPACE. +// This is useful for operations that need to query resources across all namespaces +// cluster wide. +func getRawClient(helper *common_helper.Helper) (client.Client, error) { + cfg, err := config.GetConfig() + if err != nil { + return nil, err + } + + rawClient, err := client.New(cfg, client.Options{Scheme: helper.GetScheme()}) + if err != nil { + return nil, err + } + + return rawClient, nil +} + +// generateAppServerSelectorLabels returns a map of labels used as selectors +// for the application server pods. +func generateAppServerSelectorLabels() map[string]string { + return map[string]string{ + "app.kubernetes.io/component": "app-server", + "app.kubernetes.io/managed-by": "openstack-lightspeed-operator", + "app.kubernetes.io/name": "openstack-lightspeed-app-server", + "app.kubernetes.io/part-of": "openstack-lightspeed", + } +} + +// getConfigMapResourceVersion retrieves the resource version of a ConfigMap. +func getConfigMapResourceVersion(ctx context.Context, h *common_helper.Helper, name string, namespace string) (string, error) { + rawClient, err := getRawClient(h) + if err != nil { + return "", fmt.Errorf("failed to get raw client: %w", err) + } + + cm := &corev1.ConfigMap{} + err = rawClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, cm) + if err != nil { + return "", fmt.Errorf("failed to get configmap %s: %w", name, err) + } + return cm.ResourceVersion, nil +} + +// providerNameToEnvVarName converts a provider name to a valid environment variable name. +// It uppercases the string and replaces hyphens and dots with underscores. +func providerNameToEnvVarName(providerName string) string { + name := strings.ToUpper(providerName) + name = strings.ReplaceAll(name, "-", "_") + name = strings.ReplaceAll(name, ".", "_") + return name +} + +// getPostgresCAConfigVolume returns a Volume for the Postgres CA certificate ConfigMap. +func getPostgresCAConfigVolume() corev1.Volume { + defaultMode := VolumeDefaultMode + return corev1.Volume{ + Name: PostgresCAVolume, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: OpenStackLightspeedCAConfigMap, + }, + DefaultMode: &defaultMode, + }, + }, + } +} + +// getPostgresCAVolumeMount returns a VolumeMount for the Postgres CA certificate. +func getPostgresCAVolumeMount() corev1.VolumeMount { + return corev1.VolumeMount{ + Name: PostgresCAVolume, + MountPath: OpenStackLightspeedAppCertsMountRoot + "/postgres-ca", + ReadOnly: true, + } +} + +// getPostgresCAVolumeMountWithPath returns a VolumeMount for the Postgres CA certificate +// at the specified mount path. Used by the postgres container itself. +func getPostgresCAVolumeMountWithPath(mountPath string) corev1.VolumeMount { + return corev1.VolumeMount{ + Name: PostgresCAVolume, + MountPath: mountPath, + ReadOnly: true, + } +} + +// generatePostgresSelectorLabels returns selector labels for Postgres components. +func generatePostgresSelectorLabels() map[string]string { + return map[string]string{ + "app.kubernetes.io/component": "postgres-server", + "app.kubernetes.io/managed-by": "openstack-lightspeed-operator", + "app.kubernetes.io/name": "openstack-lightspeed-service-postgres", + "app.kubernetes.io/part-of": "openstack-lightspeed", + } +} + +// getResourcesOrDefault returns the provided resource requirements if non-nil, +// otherwise returns the given default resource requirements. +func getResourcesOrDefault(custom *corev1.ResourceRequirements, defaults corev1.ResourceRequirements) corev1.ResourceRequirements { + if custom != nil { + return *custom + } + return defaults +} + +// isDeploymentReady checks whether the provided deployment is ready by verifying +// that the deployment's observed generation matches the current generation and +// all replicas (updated, available, and total) match the desired count. +func isDeploymentReady(deploy *appsv1.Deployment) bool { + if deploy.Generation > deploy.Status.ObservedGeneration { + return false + } + + replicas := int32(1) + if deploy.Spec.Replicas != nil { + replicas = *deploy.Spec.Replicas + } + + return deploy.Status.UpdatedReplicas == replicas && + deploy.Status.AvailableReplicas == replicas && + deploy.Status.Replicas == replicas +} + +// getDeployment retrieves deployment from the cluster +func getDeployment(ctx context.Context, h *common_helper.Helper, name string, namespace string) (*appsv1.Deployment, error) { + deployment := &appsv1.Deployment{} + err := h.GetClient().Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, deployment) + if err != nil { + if k8s_errors.IsNotFound(err) { + return &appsv1.Deployment{}, errors.New("deployment not found") + } + return &appsv1.Deployment{}, fmt.Errorf("failed to get deployment %s: %w", name, err) + } + + return deployment, nil +} diff --git a/internal/controller/constants.go b/internal/controller/constants.go new file mode 100644 index 00000000..4c390d4a --- /dev/null +++ b/internal/controller/constants.go @@ -0,0 +1,121 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + _ "embed" + "time" +) + +const ( + // Volume Permissions + VolumeDefaultMode = int32(420) + VolumeRestrictedMode = int32(0600) + + // Operator Settings + ResourceCreationTimeout = 60 * time.Second + + // Application Server + OpenStackLightspeedAppServerServiceAccountName = "lightspeed-app-server" + OpenStackLightspeedAppServerSARRoleName = OpenStackLightspeedAppServerServiceAccountName + "-sar-role" + OpenStackLightspeedAppServerSARRoleBindingName = OpenStackLightspeedAppServerSARRoleName + "-binding" + OpenStackLightspeedAppServerContainerPort = 8443 + OpenStackLightspeedAppServerServicePort = 8443 + OpenStackLightspeedAppServerServiceName = "lightspeed-app-server" + OpenStackLightspeedAppServerNetworkPolicyName = "lightspeed-app-server" + OpenStackLightspeedCertsSecretName = "lightspeed-tls" + OpenStackLightspeedDefaultProvider = "openstack-lightspeed-provider" + OpenStackLightspeedVectorDBPath = "/rag/vector_db/os_product_docs" + + ServingCertSecretAnnotationKey = "service.beta.openshift.io/serving-cert-secret-name" + + // Monitoring + MetricsReaderServiceAccountTokenSecretName = "metrics-reader-token" + MetricsReaderServiceAccountName = "lightspeed-operator-metrics-reader" + + // Cert / CA + OpenStackLightspeedAppCertsMountRoot = "/etc/certs" + OpenStackLightspeedCAConfigMap = "openshift-service-ca.crt" + OpenShiftCAVolumeName = "openshift-ca" + AdditionalCAVolumeName = "additional-ca" + AdditionalCACertFile = "cert.crt" + + // Postgres + PostgresCAVolume = "cm-olspostgresca" + PostgresDeploymentName = "lightspeed-postgres-server" + PostgresServiceName = "lightspeed-postgres-server" + PostgresSecretName = "lightspeed-postgres-secret" + PostgresCertsSecretName = "lightspeed-postgres-certs" + PostgresBootstrapSecretName = "lightspeed-postgres-bootstrap" + PostgresConfigMapName = "lightspeed-postgres-conf" + PostgresNetworkPolicyName = "lightspeed-postgres-server" + PostgresServicePort = int32(5432) + PostgresDefaultUser = "postgres" + PostgresDefaultDbName = "postgres" + PostgresDefaultSSLMode = "require" + PostgresSharedBuffers = "256MB" + PostgresMaxConnections = 100 + OpenStackLightspeedComponentPasswordFileName = "password" + PostgresExtensionScript = "create-extensions.sh" + PostgresConfigKey = "postgresql.conf.sample" + PostgresBootstrapVolumeMountPath = "/usr/share/container-scripts/postgresql/start/create-extensions.sh" + PostgresConfigVolumeMountPath = "/usr/share/pgsql/postgresql.conf.sample" + PostgresDataVolume = "postgres-data" + PostgresDataVolumeMountPath = "/var/lib/pgsql" + PostgresVarRunVolumeName = "lightspeed-postgres-var-run" + PostgresVarRunVolumeMountPath = "/var/run/postgresql" + TmpVolumeName = "tmp-writable-volume" + TmpVolumeMountPath = "/tmp" + PostgresConfigMapResourceVersionAnnotation = "ols.openshift.io/postgres-configmap-version" + + // LCore specific + LlamaStackContainerPort = int32(8321) + LlamaStackConfigCmName = "llama-stack-config" + LCoreConfigCmName = "lightspeed-stack-config" + LCoreDeploymentName = "lightspeed-stack-deployment" + LlamaStackConfigMountPath = "/app-root/run.yaml" + LCoreConfigMountPath = "/app-root/lightspeed-stack.yaml" + LlamaStackConfigFilename = "run.yaml" + LCoreConfigFilename = "lightspeed-stack.yaml" + LCoreConfigMapResourceVersionAnnotation = "ols.openshift.io/lcore-configmap-version" + LlamaStackConfigMapResourceVersionAnnotation = "ols.openshift.io/llamastack-configmap-version" + LCoreUserDataMountPath = "/tmp/data" + ForceReloadAnnotationKey = "ols.openshift.io/force-reload" + + // Data Exporter + ExporterConfigVolumeName = "exporter-config" + ExporterConfigMountPath = "/etc/config" + ExporterConfigFilename = "config.yaml" + RHOSOLightspeedOwnerIDLabel = "openstack.org/lightspeed-owner-id" + ServiceIDRHOSO = "rhos-lightspeed" + + // Azure + AzureOpenAIType = "azure_openai" + + // EnvVarSuffixAPIKey is the environment variable suffix for API key credentials + EnvVarSuffixAPIKey = "_API_KEY" +) + +// PostgreSQL Bootstrap Script - creates database, extensions, and schemas +// +//go:embed assets/postgres_bootstrap.sh +var PostgresBootStrapScriptContent string + +// PostgreSQL Configuration - SSL and TLS settings +// +//go:embed assets/postgres.conf +var PostgresConfigMapContent string diff --git a/internal/controller/errors.go b/internal/controller/errors.go new file mode 100644 index 00000000..53658539 --- /dev/null +++ b/internal/controller/errors.go @@ -0,0 +1,49 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import "errors" + +var ( + // Lcore Errors + ErrCreateAPIConfigmap = errors.New("failed to create OpenStack Lightspeed configmap") + ErrCreateAPIDeployment = errors.New("failed to create OpenStack Lightspeed deployment") + ErrCreateAPIService = errors.New("failed to create OpenStack Lightspeed service") + ErrCreateAPIServiceAccount = errors.New("failed to create OpenStack Lightspeed service account") + ErrCreateAppServerNetworkPolicy = errors.New("failed to create AppServer network policy") + ErrCreateSARClusterRole = errors.New("failed to create SAR cluster role") + ErrCreateSARClusterRoleBinding = errors.New("failed to create SAR cluster role binding") + ErrDeleteSARClusterRole = errors.New("failed to delete SAR cluster role") + ErrDeleteSARClusterRoleBinding = errors.New("failed to delete SAR cluster role binding") + ErrGenerateAPIConfigmap = errors.New("failed to generate OpenStack Lightspeed configmap") + ErrGetAdditionalCACM = errors.New("failed to get additional CA configmap") + ErrGetProxyCACM = errors.New("failed to get proxy CA configmap") + ErrGetTLSSecret = errors.New("failed to get TLS secret") + ErrCreateLlamaStackConfigMap = errors.New("failed to create Llama Stack configmap") + ErrGenerateLlamaStackConfigMap = errors.New("failed to generate Llama Stack configmap") + + // Postgres Errors + ErrCreatePostgresDeployment = errors.New("failed to create Postgres deployment") + ErrCreatePostgresService = errors.New("failed to create Postgres service") + ErrGeneratePostgresSecret = errors.New("failed to generate Postgres secret") + ErrCreatePostgresSecret = errors.New("failed to create Postgres secret") + ErrGetPostgresSecret = errors.New("failed to get Postgres secret") + ErrCreatePostgresBootstrapSecret = errors.New("failed to create Postgres bootstrap secret") + ErrCreatePostgresConfigMap = errors.New("failed to create Postgres configmap") + ErrGetPostgresConfigMap = errors.New("failed to get Postgres configmap") + ErrCreatePostgresNetworkPolicy = errors.New("failed to create Postgres network policy") +) diff --git a/internal/controller/funcs.go b/internal/controller/funcs.go deleted file mode 100644 index 2be67ed9..00000000 --- a/internal/controller/funcs.go +++ /dev/null @@ -1,364 +0,0 @@ -/* -Copyright 2025. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controller - -import ( - "context" - "fmt" - "math/rand" - "strconv" - - apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/config" - - _ "embed" - - common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - k8s_errors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" -) - -const ( - // OpenStackLightspeedDefaultProvider - contains default name for the provider created in OLSConfig - // by openstack-operator. - OpenStackLightspeedDefaultProvider = "openstack-lightspeed-provider" - - // OpenStackLightspeedOwnerIDLabel - name of a label that contains ID of OpenStackLightspeed instance - // that manages the OLSConfig. - OpenStackLightspeedOwnerIDLabel = "openstack.org/lightspeed-owner-id" - - // OpenStackLightspeedVectorDBPath - path inside of the container image where the vector DB are - // located - OpenStackLightspeedVectorDBPath = "/rag/vector_db/os_product_docs" - - // OpenStackLightspeedJobName - name of the pod that is used to discover environment variables inside of the RAG - // container image - OpenStackLightspeedJobName = "openstack-lightspeed" - - // OLSConfigName - OLS forbids other name for OLSConfig instance than OLSConfigName - OLSConfigName = "cluster" -) - -// systemPrompt - system prompt tailored to the needs of OpenStack Lightspeed. It overwrites the default OLS prompt. -// -//go:embed system_prompt.txt -var systemPrompt string - -// GetSystemPrompt returns the OpenStackLightspeed system prompt -func GetSystemPrompt() string { - return systemPrompt -} - -// RemoveOLSConfig attempts to remove the OLSConfig custom resource if it exists -// and is managed by the given OpenStackLightspeed instance. It first fetches the OLSConfig, -// checks whether the current OpenStackLightspeed instance is the owner (via label check), -// and if so, removes the finalizer and deletes the OLSConfig resource. -// Returns (true, nil) if the OLSConfig is not found (indicating it has already been deleted). -// Returns (true, nil) if the resource was deleted successfully, or (false, error) if any error occurs. -func RemoveOLSConfig( - ctx context.Context, - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, -) (bool, error) { - olsConfig, err := GetOLSConfig(ctx, helper) - if err != nil && !k8s_errors.IsNotFound(err) { - return false, err - } else if err != nil && k8s_errors.IsNotFound(err) { - return true, nil - } - - _, err = controllerutil.CreateOrPatch(ctx, helper.GetClient(), &olsConfig, func() error { - ownerLabel := olsConfig.GetLabels()[OpenStackLightspeedOwnerIDLabel] - isInstanceOwnedOLSConfig := ownerLabel == string(instance.GetObjectMeta().GetUID()) - - if ownerLabel == "" || !isInstanceOwnedOLSConfig { - helper.GetLogger().Info("Skipping OLSConfig deletion as it is not managed by the OpenStackLightspeed instance") - return nil - } - - if ok := controllerutil.RemoveFinalizer(&olsConfig, helper.GetFinalizer()); !ok { - return fmt.Errorf("remove finalizer failed") - } - - return nil - }) - if err != nil { - return false, err - } - - err = helper.GetClient().Delete(ctx, &olsConfig) - if err != nil { - return false, err - } - - _, err = GetOLSConfig(ctx, helper) - if err != nil && k8s_errors.IsNotFound(err) { - return true, nil - } else if err != nil { - return false, err - } - - return false, nil -} - -// GetOLSConfig returns OLSConfig if there is one present in the cluster. -func GetOLSConfig(ctx context.Context, helper *common_helper.Helper) (uns.Unstructured, error) { - OLSConfigGVR := schema.GroupVersionResource{ - Group: "ols.openshift.io", - Version: "v1alpha1", - Resource: "olsconfigs", - } - - OLSConfigList := &uns.UnstructuredList{} - OLSConfigList.SetGroupVersionKind(OLSConfigGVR.GroupVersion().WithKind("OLSConfig")) - err := helper.GetClient().List(ctx, OLSConfigList) - if err != nil { - return uns.Unstructured{}, err - } - - if len(OLSConfigList.Items) > 0 { - return OLSConfigList.Items[0], nil - } - - return uns.Unstructured{}, k8s_errors.NewNotFound( - schema.GroupResource{Group: "ols.openshifg.io", Resource: "olsconfigs"}, - "OLSConfig") -} - -// BuildRAGConfigs builds the RAG configuration array. -// OpenStack RAG is always included first. -// OCP RAG is added if ocpVersion is provided. -func BuildRAGConfigs(instance *apiv1beta1.OpenStackLightspeed, ocpVersion string) []interface{} { - rags := []interface{}{ - // OpenStack RAG - map[string]interface{}{ - "image": instance.Spec.RAGImage, - "indexPath": OpenStackLightspeedVectorDBPath, - }, - } - - // Add OCP RAG if enabled - if ocpVersion != "" { - rags = append(rags, map[string]interface{}{ - "image": instance.Spec.RAGImage, - "indexPath": GetOCPVectorDBPath(ocpVersion), - "indexID": GetOCPIndexName(ocpVersion), - }) - } - - return rags -} - -// PatchOLSConfig patches OLSConfig with information from OpenStackLightspeed instance. -func PatchOLSConfig( - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, - olsConfig *uns.Unstructured, -) error { - // Patch the Providers section - providersPatch := []interface{}{ - map[string]interface{}{ - "credentialsSecretRef": map[string]interface{}{ - "name": instance.Spec.LLMCredentials, - }, - "models": []interface{}{ - map[string]interface{}{ - "name": instance.Spec.ModelName, - "parameters": map[string]interface{}{ - "maxTokensForResponse": float64(instance.Spec.MaxTokensForResponse), // unstructured JSON numbers default to float64 - }, - }, - }, - "name": OpenStackLightspeedDefaultProvider, - "type": instance.Spec.LLMEndpointType, - "url": instance.Spec.LLMEndpoint, - }, - } - - provider := providersPatch[0].(map[string]interface{}) - if instance.Spec.LLMProjectID != "" { - if err := uns.SetNestedField(provider, instance.Spec.LLMProjectID, "projectID"); err != nil { - return err - } - } - - if instance.Spec.LLMDeploymentName != "" { - if err := uns.SetNestedField(provider, instance.Spec.LLMDeploymentName, "deploymentName"); err != nil { - return err - } - } - - if instance.Spec.LLMAPIVersion != "" { - if err := uns.SetNestedField(provider, instance.Spec.LLMAPIVersion, "apiVersion"); err != nil { - return err - } - } - - if err := uns.SetNestedSlice(olsConfig.Object, providersPatch, "spec", "llm", "providers"); err != nil { - return err - } - - // Patch the RAG section - // Build RAG array with priorities using BuildRAGConfigs - ragConfigs := BuildRAGConfigs(instance, instance.Status.ActiveOCPRAGVersion) - - if err := uns.SetNestedSlice(olsConfig.Object, ragConfigs, "spec", "ols", "rag"); err != nil { - return err - } - - if instance.Spec.TLSCACertBundle != "" { - tlsCaCertBundle := instance.Spec.TLSCACertBundle - err := uns.SetNestedField(olsConfig.Object, tlsCaCertBundle, "spec", "ols", "additionalCAConfigMapRef", "name") - if err != nil { - return err - } - } - - modelName := instance.Spec.ModelName - err := uns.SetNestedField(olsConfig.Object, modelName, "spec", "ols", "defaultModel") - if err != nil { - return err - } - - err = uns.SetNestedField(olsConfig.Object, OpenStackLightspeedDefaultProvider, "spec", "ols", "defaultProvider") - if err != nil { - return err - } - - // Disable the OCP RAG - // TODO(lucasagomes): Remove this once we have a "query router" that can - // handle multiple RAGs nicely - err = uns.SetNestedField(olsConfig.Object, true, "spec", "ols", "byokRAGOnly") - if err != nil { - return err - } - - // Disable or enable feedback collection - err = uns.SetNestedField(olsConfig.Object, instance.Spec.FeedbackDisabled, "spec", "ols", "userDataCollection", "feedbackDisabled") - if err != nil { - return err - } - - // Disable or enable transcripts collection - err = uns.SetNestedField(olsConfig.Object, instance.Spec.TranscriptsDisabled, "spec", "ols", "userDataCollection", "transcriptsDisabled") - if err != nil { - return err - } - - err = uns.SetNestedField(olsConfig.Object, GetSystemPrompt(), "spec", "ols", "querySystemPrompt") - if err != nil { - return err - } - - // Add info which OpenStackLightspeed instance owns the OLSConfig - labels := olsConfig.GetLabels() - updatedLabels := map[string]interface{}{ - OpenStackLightspeedOwnerIDLabel: string(instance.GetUID()), - } - for k, v := range labels { - updatedLabels[k] = v - } - - err = uns.SetNestedField(olsConfig.Object, updatedLabels, "metadata", "labels") - if err != nil { - return err - } - - // Add OpenStack finalizers - if !controllerutil.AddFinalizer(olsConfig, helper.GetFinalizer()) && instance.Status.Conditions == nil { - return fmt.Errorf("cannot add finalizer") - } - - return nil -} - -// IsOLSConfigReady returns true if OLSConfig's overallStatus is Ready -func IsOLSConfigReady(ctx context.Context, helper *common_helper.Helper) (bool, error) { - olsConfig, err := GetOLSConfig(ctx, helper) - if err != nil { - return false, err - } - - overallStatus, found, err := uns.NestedString(olsConfig.Object, "status", "overallStatus") - if err != nil { - return false, err - } - - if !found || overallStatus != "Ready" { - return false, OLSConfigPing(ctx, helper) - } - - return true, nil -} - -// IsOwnedBy returns true if 'object' is owned by 'owner' based on OwnerReference UID. -func IsOwnedBy(object metav1.Object, owner metav1.Object) bool { - for _, ref := range object.GetOwnerReferences() { - if ref.UID == owner.GetUID() { - return true - } - } - return false -} - -// GetRawClient returns a raw client that is not restricted to WATCH_NAMESPACE. -// This is useful for operations that need to query resources across all namespaces -// cluster wide. -func GetRawClient(helper *common_helper.Helper) (client.Client, error) { - cfg, err := config.GetConfig() - if err != nil { - return nil, err - } - - rawClient, err := client.New(cfg, client.Options{Scheme: helper.GetScheme()}) - if err != nil { - return nil, err - } - - return rawClient, nil -} - -// OLSConfigPing adds a random label to the OLSConfig to trigger a reconciliation -// by the OpenShift Lightspeed operator. This causes the operator to update the Status field. -// Note: This is a workaround for a current limitation—when the OLS operator is installed -// in the openstack-lightspeed namespace, it does not automatically update the OLSConfig -// status as expected. -func OLSConfigPing(ctx context.Context, helper *common_helper.Helper) error { - const randomLabelKey = "openstack-lightspeed/ping" - - olsConfig, err := GetOLSConfig(ctx, helper) - if err != nil { - return err - } - - labels := olsConfig.GetLabels() - if labels == nil { - labels = make(map[string]string) - } - - labels[randomLabelKey] = strconv.Itoa(rand.Int()) - olsConfig.SetLabels(labels) - - if err := helper.GetClient().Update(ctx, &olsConfig); err != nil { - return err - } - return nil -} diff --git a/internal/controller/lcore_config.go b/internal/controller/lcore_config.go new file mode 100644 index 00000000..42bbe43f --- /dev/null +++ b/internal/controller/lcore_config.go @@ -0,0 +1,223 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + _ "embed" + "fmt" + + common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" + "sigs.k8s.io/yaml" +) + +// systemPrompt - system prompt tailored to the needs of OpenStack Lightspeed. +// +//go:embed assets/system_prompt.txt +var systemPrompt string + +// getSystemPrompt returns the OpenStackLightspeed system prompt +func getSystemPrompt() string { + return systemPrompt +} + +// lcoreProvider represents an LLM provider configuration. +type lcoreProvider struct { + Name string + URL string + Type string + CredentialsSecret string + Models []lcoreModel + AzureDeploymentName string + APIVersion string + WatsonProjectID string +} + +// lcoreModel represents a model configuration. +type lcoreModel struct { + Name string + MaxTokensForResponse int +} + +// lcoreRAG represents RAG configuration. +type lcoreRAG struct { + Image string + IndexPath string + IndexID string +} + +// buildProvider creates an lcoreProvider from an OpenStackLightspeed instance. +func buildProvider(instance *apiv1beta1.OpenStackLightspeed) lcoreProvider { + return lcoreProvider{ + Name: OpenStackLightspeedDefaultProvider, + URL: instance.Spec.LLMEndpoint, + Type: instance.Spec.LLMEndpointType, + CredentialsSecret: instance.Spec.LLMCredentials, + Models: []lcoreModel{ + { + Name: instance.Spec.ModelName, + MaxTokensForResponse: instance.Spec.MaxTokensForResponse, + }, + }, + AzureDeploymentName: instance.Spec.LLMDeploymentName, + APIVersion: instance.Spec.LLMAPIVersion, + WatsonProjectID: instance.Spec.LLMProjectID, + } +} + +// buildLCoreRAGConfigs builds the RAG configuration from an OpenStackLightspeed instance. +func buildLCoreRAGConfigs(instance *apiv1beta1.OpenStackLightspeed, ocpVersion string) []lcoreRAG { + rags := []lcoreRAG{ + { + Image: instance.Spec.RAGImage, + IndexPath: OpenStackLightspeedVectorDBPath, + }, + } + + if ocpVersion != "" { + rags = append(rags, lcoreRAG{ + Image: instance.Spec.RAGImage, + IndexPath: GetOCPVectorDBPath(ocpVersion), + IndexID: GetOCPIndexName(ocpVersion), + }) + } + + return rags +} + +func buildLCoreServiceConfig(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) map[string]interface{} { + return map[string]interface{}{ + "host": "0.0.0.0", + "port": OpenStackLightspeedAppServerContainerPort, + "auth_enabled": true, + "workers": 1, + "color_log": false, + "access_log": true, + "tls_config": map[string]interface{}{ + "tls_certificate_path": "/etc/certs/lightspeed-tls/tls.crt", + "tls_key_path": "/etc/certs/lightspeed-tls/tls.key", + }, + } +} + +func buildLCoreLlamaStackConfig() map[string]interface{} { + llamaStackConfig := map[string]interface{}{ + "use_as_library_client": false, + "url": fmt.Sprintf("http://localhost:%d", LlamaStackContainerPort), + } + + return llamaStackConfig +} + +func buildLCoreUserDataCollectionConfig(_ *common_helper.Helper, instance *apiv1beta1.OpenStackLightspeed) map[string]interface{} { + feedbackEnabled := !instance.Spec.FeedbackDisabled + transcriptsEnabled := !instance.Spec.TranscriptsDisabled + + return map[string]interface{}{ + "feedback_enabled": feedbackEnabled, + "feedback_storage": LCoreUserDataMountPath + "/feedback", + "transcripts_enabled": transcriptsEnabled, + "transcripts_storage": LCoreUserDataMountPath + "/transcripts", + } +} + +func buildLCoreAuthenticationConfig(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) map[string]interface{} { + return map[string]interface{}{ + "module": "k8s", + } +} + +func buildLCoreInferenceConfig(_ *common_helper.Helper, instance *apiv1beta1.OpenStackLightspeed) map[string]interface{} { + return map[string]interface{}{ + "default_provider": OpenStackLightspeedDefaultProvider, + "default_model": instance.Spec.ModelName, + } +} + +// buildLCoreDatabaseConfig configures persistent database storage (PostgreSQL) +func buildLCoreDatabaseConfig(h *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) map[string]interface{} { + return map[string]interface{}{ + "postgres": map[string]interface{}{ + "host": PostgresServiceName + "." + h.GetBeforeObject().GetNamespace() + ".svc", + "port": PostgresServicePort, + "db": PostgresDefaultDbName, + "user": PostgresDefaultUser, + "ssl_mode": PostgresDefaultSSLMode, + "gss_encmode": "disable", + "ca_cert_path": "/etc/certs/postgres-ca/service-ca.crt", + + // Environment variable substitution via llama_stack.core.stack.replace_env_vars + "password": "${env.POSTGRES_PASSWORD}", + + // Separate schema for LCore to avoid conflicts with App Server + "namespace": "lcore", + }, + } +} + +// buildLCoreCustomizationConfig configures system prompt customization +// Uses config field if set, otherwise falls back to default +func buildLCoreCustomizationConfig() map[string]interface{} { + return map[string]interface{}{ + "system_prompt": getSystemPrompt(), + // Prevent users from overriding via API + "disable_query_system_prompt": true, + } +} + +// buildLCoreConversationCacheConfig configures chat history caching (PostgreSQL) +func buildLCoreConversationCacheConfig(h *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) map[string]interface{} { + return map[string]interface{}{ + "type": "postgres", + "postgres": map[string]interface{}{ + "host": PostgresServiceName + "." + h.GetBeforeObject().GetNamespace() + ".svc", + "port": PostgresServicePort, + "db": PostgresDefaultDbName, + "user": PostgresDefaultUser, + "password": "${env.POSTGRES_PASSWORD}", + "ssl_mode": PostgresDefaultSSLMode, + "gss_encmode": "disable", + "ca_cert_path": "/etc/certs/postgres-ca/service-ca.crt", + "namespace": "conversation_cache", + }, + } +} + +// buildLCoreConfigYAML assembles the complete Lightspeed Core Service configuration and converts to YAML. +// NOTE: MCP servers, quota handlers, and tools approval features are disabled for OpenStack Lightspeed. +func buildLCoreConfigYAML(h *common_helper.Helper, instance *apiv1beta1.OpenStackLightspeed) (string, error) { + // Build the complete config as a map + config := map[string]interface{}{ + "name": "Lightspeed Core Service (LCS)", + "service": buildLCoreServiceConfig(h, instance), + "llama_stack": buildLCoreLlamaStackConfig(), + "user_data_collection": buildLCoreUserDataCollectionConfig(h, instance), + "authentication": buildLCoreAuthenticationConfig(h, instance), + "inference": buildLCoreInferenceConfig(h, instance), + "database": buildLCoreDatabaseConfig(h, instance), + "customization": buildLCoreCustomizationConfig(), + "conversation_cache": buildLCoreConversationCacheConfig(h, instance), + } + + // Convert to YAML + yamlBytes, err := yaml.Marshal(config) + if err != nil { + return "", fmt.Errorf("failed to marshal LCore config to YAML: %w", err) + } + + return string(yamlBytes), nil +} diff --git a/internal/controller/lcore_deployment.go b/internal/controller/lcore_deployment.go new file mode 100644 index 00000000..069003d1 --- /dev/null +++ b/internal/controller/lcore_deployment.go @@ -0,0 +1,491 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + + common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" +) + +// buildLCorePodTemplateSpec builds the pod template spec for the LCore deployment. +// This function is used by CreateOrPatch to generate the desired pod spec. +func buildLCorePodTemplateSpec(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) (corev1.PodTemplateSpec, error) { + // Build shared volumes + volumes := []corev1.Volume{} + + // Llama Stack config volume (used by llama-stack container) + llamaVol, llamaMount := buildLlamaStackConfigVolumeAndMount(VolumeDefaultMode) + volumes = append(volumes, llamaVol) + + // LCore config volume (used by lightspeed-stack container) + lcoreVol, lcoreMount := buildLCoreConfigVolumeAndMount(VolumeDefaultMode) + volumes = append(volumes, lcoreVol) + + // Shared volumes - CA, postgres + sharedMounts := []corev1.VolumeMount{} + addOpenShiftCAVolumesAndMounts(&volumes, &sharedMounts, VolumeDefaultMode) + addOpenShiftRootCAVolumesAndMounts(&volumes, &sharedMounts, VolumeDefaultMode) + addPostgresCAVolumesAndMounts(&volumes, &sharedMounts) + addUserCAVolumesAndMounts(&volumes, &sharedMounts, instance, VolumeDefaultMode) + + // Llama cache emptydir + llamaCacheMounts := []corev1.VolumeMount{} + addLlamaCacheVolumesAndMounts(&volumes, &llamaCacheMounts) + + // Build env vars + llamaEnvVars, err := buildLlamaStackEnvVars(h, ctx, instance) + if err != nil { + return corev1.PodTemplateSpec{}, fmt.Errorf("failed to build llama-stack env vars: %w", err) + } + lsEnvVars := buildLightspeedStackEnvVars() + + // Llama Stack container mounts: its config + shared + cache + llamaStackMounts := []corev1.VolumeMount{llamaMount} + llamaStackMounts = append(llamaStackMounts, sharedMounts...) + llamaStackMounts = append(llamaStackMounts, llamaCacheMounts...) + + llamaStackContainer := corev1.Container{ + Name: "llama-stack", + Image: apiv1beta1.OpenStackLightspeedDefaultValues.LCoreImageURL, + Command: []string{"llama", "stack", "run", LlamaStackConfigMountPath}, + Ports: []corev1.ContainerPort{{Name: "llama-stack", ContainerPort: LlamaStackContainerPort}}, + VolumeMounts: llamaStackMounts, + Env: llamaEnvVars, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + TCPSocket: &corev1.TCPSocketAction{ + Port: intstr.FromInt32(LlamaStackContainerPort), + }, + }, + InitialDelaySeconds: 5, + PeriodSeconds: 10, + }, + Resources: getResourcesOrDefault(nil, corev1.ResourceRequirements{}), + ImagePullPolicy: corev1.PullIfNotPresent, + } + + // Lightspeed Stack container mounts: its config + shared + TLS (only API container needs TLS) + lightspeedStackMounts := []corev1.VolumeMount{lcoreMount} + lightspeedStackMounts = append(lightspeedStackMounts, sharedMounts...) + tlsMounts := []corev1.VolumeMount{} + addTLSVolumesAndMounts(&volumes, &tlsMounts, VolumeDefaultMode) + lightspeedStackMounts = append(lightspeedStackMounts, tlsMounts...) + + lightspeedStackContainer := corev1.Container{ + Name: "lightspeed-service-api", + Image: apiv1beta1.OpenStackLightspeedDefaultValues.LCoreImageURL, + Ports: []corev1.ContainerPort{{Name: "https", ContainerPort: OpenStackLightspeedAppServerContainerPort}}, + VolumeMounts: lightspeedStackMounts, + Env: lsEnvVars, + LivenessProbe: buildLightspeedStackLivenessProbe(), + ReadinessProbe: buildLightspeedStackReadinessProbe(), + Resources: getResourcesOrDefault(nil, corev1.ResourceRequirements{}), + ImagePullPolicy: corev1.PullIfNotPresent, + } + + containers := []corev1.Container{llamaStackContainer, lightspeedStackContainer} + + // Build configmap resource version annotations for change detection + annotations, err := buildConfigMapAnnotations(h, ctx) + if err != nil { + return corev1.PodTemplateSpec{}, err + } + + return corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: generateAppServerSelectorLabels(), + Annotations: annotations, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: OpenStackLightspeedAppServerServiceAccountName, + Containers: containers, + Volumes: volumes, + }, + }, nil +} + +// buildLCoreConfigVolumeAndMount returns the volume and mount for the lightspeed-stack config. +func buildLCoreConfigVolumeAndMount(volumeDefaultMode int32) (corev1.Volume, corev1.VolumeMount) { + vol := corev1.Volume{ + Name: "lcore-config", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: LCoreConfigCmName, + }, + DefaultMode: toPtr(volumeDefaultMode), + }, + }, + } + mount := corev1.VolumeMount{ + Name: "lcore-config", + MountPath: LCoreConfigMountPath, + SubPath: LCoreConfigFilename, + ReadOnly: true, + } + return vol, mount +} + +// buildLlamaStackConfigVolumeAndMount returns the volume and mount for the llama-stack config. +func buildLlamaStackConfigVolumeAndMount(volumeDefaultMode int32) (corev1.Volume, corev1.VolumeMount) { + vol := corev1.Volume{ + Name: "llama-stack-config", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: LlamaStackConfigCmName, + }, + DefaultMode: toPtr(volumeDefaultMode), + }, + }, + } + mount := corev1.VolumeMount{ + Name: "llama-stack-config", + MountPath: LlamaStackConfigMountPath, + SubPath: LlamaStackConfigFilename, + ReadOnly: true, + } + return vol, mount +} + +// addTLSVolumesAndMounts adds the service-ca TLS certificate volume and mount. +func addTLSVolumesAndMounts(volumes *[]corev1.Volume, mounts *[]corev1.VolumeMount, volumeDefaultMode int32) { + *volumes = append(*volumes, corev1.Volume{ + Name: "tls-certs", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: OpenStackLightspeedCertsSecretName, + DefaultMode: toPtr(volumeDefaultMode), + }, + }, + }) + *mounts = append(*mounts, corev1.VolumeMount{ + Name: "tls-certs", + MountPath: OpenStackLightspeedAppCertsMountRoot + "/lightspeed-tls", + ReadOnly: true, + }) +} + +// addOpenShiftCAVolumesAndMounts adds the OpenShift service-ca CA bundle volume and mount. +func addOpenShiftCAVolumesAndMounts(volumes *[]corev1.Volume, mounts *[]corev1.VolumeMount, volumeDefaultMode int32) { + *volumes = append(*volumes, corev1.Volume{ + Name: OpenShiftCAVolumeName, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: OpenStackLightspeedCAConfigMap, + }, + DefaultMode: toPtr(volumeDefaultMode), + }, + }, + }) + *mounts = append(*mounts, corev1.VolumeMount{ + Name: OpenShiftCAVolumeName, + MountPath: OpenStackLightspeedAppCertsMountRoot + "/openshift-ca", + ReadOnly: true, + }) +} + +// addOpenShiftRootCAVolumesAndMounts adds the OpenShift cluster-wide root CA bundle. +func addOpenShiftRootCAVolumesAndMounts(volumes *[]corev1.Volume, mounts *[]corev1.VolumeMount, volumeDefaultMode int32) { + *volumes = append(*volumes, corev1.Volume{ + Name: "openshift-root-ca", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "kube-root-ca.crt", + }, + DefaultMode: toPtr(volumeDefaultMode), + }, + }, + }) + *mounts = append(*mounts, corev1.VolumeMount{ + Name: "openshift-root-ca", + MountPath: OpenStackLightspeedAppCertsMountRoot + "/openshift-root-ca", + ReadOnly: true, + }) +} + +// addPostgresCAVolumesAndMounts adds the Postgres CA certificate volume and mount. +func addPostgresCAVolumesAndMounts(volumes *[]corev1.Volume, mounts *[]corev1.VolumeMount) { + *volumes = append(*volumes, getPostgresCAConfigVolume()) + *mounts = append(*mounts, getPostgresCAVolumeMount()) +} + +// addLlamaCacheVolumesAndMounts adds an emptydir volume for llama-stack cache. +func addLlamaCacheVolumesAndMounts(volumes *[]corev1.Volume, mounts *[]corev1.VolumeMount) { + *volumes = append(*volumes, corev1.Volume{ + Name: "llama-cache", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) + *mounts = append(*mounts, corev1.VolumeMount{ + Name: "llama-cache", + MountPath: "/tmp/llama-stack", + }) +} + +// addUserCAVolumesAndMounts adds user-provided additional CA certificate volume and mount +// if instance.Spec.TLSCACertBundle is set. +func addUserCAVolumesAndMounts(volumes *[]corev1.Volume, mounts *[]corev1.VolumeMount, instance *apiv1beta1.OpenStackLightspeed, volumeDefaultMode int32) { + if instance.Spec.TLSCACertBundle == "" { + return + } + *volumes = append(*volumes, corev1.Volume{ + Name: AdditionalCAVolumeName, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: instance.Spec.TLSCACertBundle, + }, + DefaultMode: toPtr(volumeDefaultMode), + }, + }, + }) + *mounts = append(*mounts, corev1.VolumeMount{ + Name: AdditionalCAVolumeName, + MountPath: OpenStackLightspeedAppCertsMountRoot + "/additional-ca", + ReadOnly: true, + }) +} + +// buildAdditionalCAEnvVars returns REQUESTS_CA_BUNDLE and SSL_CERT_FILE env vars +// pointing to the additional CA cert file, if an additional CA configmap is configured. +func buildAdditionalCAEnvVars(instance *apiv1beta1.OpenStackLightspeed) []corev1.EnvVar { + if instance.Spec.TLSCACertBundle == "" { + return nil + } + certPath := OpenStackLightspeedAppCertsMountRoot + "/additional-ca/" + AdditionalCACertFile + return []corev1.EnvVar{ + { + Name: "REQUESTS_CA_BUNDLE", + Value: certPath, + }, + { + Name: "SSL_CERT_FILE", + Value: certPath, + }, + } +} + +// buildLlamaStackEnvVars builds environment variables for llama-stack, +// primarily provider API keys read from Kubernetes secrets. +func buildLlamaStackEnvVars(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) ([]corev1.EnvVar, error) { + envVars := []corev1.EnvVar{} + + { + provider := buildProvider(instance) + if provider.CredentialsSecret == "" { + return envVars, nil + } + + envVarName := providerNameToEnvVarName(provider.Name) + + if provider.Type == AzureOpenAIType { + // Azure supports both API key and client credentials authentication. + // Read the secret to determine which fields are present. + secret := &corev1.Secret{} + err := h.GetClient().Get(ctx, types.NamespacedName{ + Name: provider.CredentialsSecret, + Namespace: h.GetBeforeObject().GetNamespace(), + }, secret) + if err != nil { + return nil, fmt.Errorf("failed to get Azure provider secret %s: %w", provider.CredentialsSecret, err) + } + + // API key (always include - required by LiteLLM's Pydantic validation) + if _, ok := secret.Data["apitoken"]; ok { + envVars = append(envVars, corev1.EnvVar{ + Name: envVarName + "_API_KEY", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: provider.CredentialsSecret, + }, + Key: "apitoken", + }, + }, + }) + } else { + // Provide an empty default so the env var exists + envVars = append(envVars, corev1.EnvVar{ + Name: envVarName + "_API_KEY", + Value: "", + }) + } + + // Client credentials fields for Azure AD authentication + for _, field := range []struct { + secretKey string + envSuffix string + }{ + {"client_id", "_CLIENT_ID"}, + {"tenant_id", "_TENANT_ID"}, + {"client_secret", "_CLIENT_SECRET"}, + } { + if _, ok := secret.Data[field.secretKey]; ok { + envVars = append(envVars, corev1.EnvVar{ + Name: envVarName + field.envSuffix, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: provider.CredentialsSecret, + }, + Key: field.secretKey, + }, + }, + }) + } else { + envVars = append(envVars, corev1.EnvVar{ + Name: envVarName + field.envSuffix, + Value: "", + }) + } + } + } else { + // Non-Azure providers: single API_KEY from the "apitoken" key + envVars = append(envVars, corev1.EnvVar{ + Name: envVarName + "_API_KEY", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: provider.CredentialsSecret, + }, + Key: "apitoken", + }, + }, + }) + + // For vLLM providers, also set the URL environment variable + // The vLLM adapter checks for VLLM_URL as a fallback if URL is not in config + if provider.Type == "rhoai_vllm" || provider.Type == "rhelai_vllm" { + if provider.URL != "" { + envVars = append(envVars, corev1.EnvVar{ + Name: "VLLM_URL", + Value: provider.URL, + }) + } + } + } + } + + // Postgres password for ${env.POSTGRES_PASSWORD} substitution in llama-stack config + envVars = append(envVars, buildPostgresPasswordEnvVar()) + + // Logging configuration + envVars = append(envVars, corev1.EnvVar{ + Name: "LLAMA_STACK_LOGGING", + Value: "all=info", + }) + + // Additional CA env vars + envVars = append(envVars, buildAdditionalCAEnvVars(instance)...) + + return envVars, nil +} + +// buildPostgresPasswordEnvVar returns the POSTGRES_PASSWORD env var sourced from the postgres secret. +func buildPostgresPasswordEnvVar() corev1.EnvVar { + return corev1.EnvVar{ + Name: "POSTGRES_PASSWORD", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: PostgresSecretName, + }, + Key: OpenStackLightspeedComponentPasswordFileName, + }, + }, + } +} + +// buildLightspeedStackEnvVars builds environment variables for the lightspeed-stack container. +func buildLightspeedStackEnvVars() []corev1.EnvVar { + return []corev1.EnvVar{ + { + Name: "LOG_LEVEL", + Value: "INFO", + }, + buildPostgresPasswordEnvVar(), + } +} + +// buildLightspeedStackLivenessProbe returns the liveness probe for the lightspeed-stack container. +func buildLightspeedStackLivenessProbe() *corev1.Probe { + return &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + TCPSocket: &corev1.TCPSocketAction{ + Port: intstr.FromInt32(OpenStackLightspeedAppServerContainerPort), + }, + }, + InitialDelaySeconds: 30, + PeriodSeconds: 10, + TimeoutSeconds: 5, + FailureThreshold: 3, + } +} + +// buildLightspeedStackReadinessProbe returns the readiness probe for the lightspeed-stack container. +func buildLightspeedStackReadinessProbe() *corev1.Probe { + return &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + TCPSocket: &corev1.TCPSocketAction{ + Port: intstr.FromInt32(OpenStackLightspeedAppServerContainerPort), + }, + }, + InitialDelaySeconds: 30, + PeriodSeconds: 10, + TimeoutSeconds: 5, + FailureThreshold: 3, + } +} + +// buildConfigMapAnnotations builds annotations with configmap resource versions +// so that changes to the configmaps trigger a deployment rollout. +func buildConfigMapAnnotations(h *common_helper.Helper, ctx context.Context) (map[string]string, error) { + annotations := make(map[string]string) + + lcoreVersion, err := getConfigMapResourceVersion(ctx, h, LCoreConfigCmName, h.GetBeforeObject().GetNamespace()) + if err != nil { + // ConfigMap may not exist yet during initial creation + if !errors.IsNotFound(err) { + return nil, fmt.Errorf("failed to get LCore configmap resource version: %w", err) + } + } else { + annotations[LCoreConfigMapResourceVersionAnnotation] = lcoreVersion + } + + llamaVersion, err := getConfigMapResourceVersion(ctx, h, LlamaStackConfigCmName, h.GetBeforeObject().GetNamespace()) + if err != nil { + if !errors.IsNotFound(err) { + return nil, fmt.Errorf("failed to get Llama Stack configmap resource version: %w", err) + } + } else { + annotations[LlamaStackConfigMapResourceVersionAnnotation] = llamaVersion + } + + return annotations, nil +} diff --git a/internal/controller/lcore_reconciler.go b/internal/controller/lcore_reconciler.go new file mode 100644 index 00000000..a922259a --- /dev/null +++ b/internal/controller/lcore_reconciler.go @@ -0,0 +1,459 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "time" + + common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +// ReconcileLCoreResources reconciles Phase 1 resources: service accounts, roles, +// config maps, secrets, and network policies. Uses a continue-on-error pattern +// so that all tasks are attempted even if some fail. +func ReconcileLCoreResources(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + tasks := []ReconcileTask{ + {Name: "ServiceAccount", Task: reconcileServiceAccount}, + {Name: "SARRole", Task: reconcileSARRole}, + {Name: "SARRoleBinding", Task: reconcileSARRoleBinding}, + {Name: "LlamaStackConfigMap", Task: reconcileLlamaStackConfigMap}, + {Name: "LcoreConfigMap", Task: reconcileLcoreConfigMap}, + {Name: "OpenStackLightspeedAdditionalCAConfigMap", Task: reconcileOpenStackLightspeedAdditionalCAConfigMap}, + {Name: "ProxyCAConfigMap", Task: reconcileProxyCAConfigMap}, + {Name: "NetworkPolicy", Task: reconcileNetworkPolicy}, + } + + return ReconcileTasks(h, ctx, instance, tasks) +} + +// ReconcileLCoreDeployment reconciles Phase 2 resources: deployment, service, +// TLS secret, service monitor, and prometheus rule. Uses a fail-fast pattern +// where the first error stops execution. +func ReconcileLCoreDeployment(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + tasks := []ReconcileTask{ + {Name: "Deployment", Task: reconcileDeployment}, + {Name: "Service", Task: reconcileService}, + {Name: "TLSSecret", Task: reconcileTLSSecret}, + } + + return ReconcileTasksFailFast(h, ctx, instance, tasks) +} + +// reconcileServiceAccount ensures the OpenStack Lightspeed app server service account exists. +func reconcileServiceAccount(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + sa := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: OpenStackLightspeedAppServerServiceAccountName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), sa, func() error { + // ServiceAccount has no spec to set, just ensure owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), sa, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreateAPIServiceAccount, err) + } + + logger.Info("ServiceAccount reconciled", "name", sa.Name, "result", result) + return nil +} + +// reconcileSARRole ensures the SAR cluster role exists. +func reconcileSARRole(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + role := &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{ + Name: OpenStackLightspeedAppServerSARRoleName, + Labels: generateAppServerSelectorLabels(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), role, func() error { + // Set the Rules spec + role.Rules = []rbacv1.PolicyRule{ + { + APIGroups: []string{"authorization.k8s.io"}, + Resources: []string{"subjectaccessreviews"}, + Verbs: []string{"create"}, + }, + { + APIGroups: []string{"authentication.k8s.io"}, + Resources: []string{"tokenreviews"}, + Verbs: []string{"create"}, + }, + } + // Note: ClusterRole is cluster-scoped, no owner reference needed + return nil + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreateSARClusterRole, err) + } + + logger.Info("SAR ClusterRole reconciled", "name", role.Name, "result", result) + return nil +} + +// reconcileSARRoleBinding ensures the SAR cluster role binding exists. +func reconcileSARRoleBinding(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + rb := &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: OpenStackLightspeedAppServerSARRoleBindingName, + Labels: generateAppServerSelectorLabels(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), rb, func() error { + // Set Subjects and RoleRef + rb.Subjects = []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: OpenStackLightspeedAppServerServiceAccountName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + rb.RoleRef = rbacv1.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: OpenStackLightspeedAppServerSARRoleName, + } + // Note: ClusterRoleBinding is cluster-scoped, no owner reference needed + return nil + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreateSARClusterRoleBinding, err) + } + + logger.Info("SAR ClusterRoleBinding reconciled", "name", rb.Name, "result", result) + return nil +} + +// reconcileLlamaStackConfigMap ensures the Llama Stack config map exists and is up to date. +func reconcileLlamaStackConfigMap(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + // Build the YAML data + yamlData, err := buildLlamaStackYAML(h, ctx, instance) + if err != nil { + return fmt.Errorf("%w: %v", ErrGenerateLlamaStackConfigMap, err) + } + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: LlamaStackConfigCmName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), cm, func() error { + // Set Data (same as current selective update) + cm.Data = map[string]string{ + LlamaStackConfigFilename: yamlData, + } + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), cm, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreateLlamaStackConfigMap, err) + } + + logger.Info("Llama Stack ConfigMap reconciled", "name", cm.Name, "result", result) + return nil +} + +// reconcileLcoreConfigMap ensures the LCore config map exists and is up to date. +func reconcileLcoreConfigMap(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + // Build the YAML data + yamlData, err := buildLCoreConfigYAML(h, instance) + if err != nil { + return fmt.Errorf("%w: %v", ErrGenerateAPIConfigmap, err) + } + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: LCoreConfigCmName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), cm, func() error { + // Set Data (same as current selective update) + cm.Data = map[string]string{ + LCoreConfigFilename: yamlData, + } + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), cm, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreateAPIConfigmap, err) + } + + logger.Info("LCore ConfigMap reconciled", "name", cm.Name, "result", result) + return nil +} + +// reconcileOpenStackLightspeedAdditionalCAConfigMap verifies that the additional CA config map +// exists if one is specified in the configuration. +func reconcileOpenStackLightspeedAdditionalCAConfigMap(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + if instance.Spec.TLSCACertBundle == "" { + logger.Info("no additional CA configmap configured, skipping") + return nil + } + + existing := &corev1.ConfigMap{} + err := h.GetClient().Get(ctx, client.ObjectKey{ + Name: instance.Spec.TLSCACertBundle, + Namespace: h.GetBeforeObject().GetNamespace(), + }, existing) + if err != nil { + return fmt.Errorf("%w %q: %v", ErrGetAdditionalCACM, instance.Spec.TLSCACertBundle, err) + } + + logger.Info("additional CA configmap found", "name", instance.Spec.TLSCACertBundle) + return nil +} + +// reconcileProxyCAConfigMap is a no-op for the minimal mapping (no proxy config). +func reconcileProxyCAConfigMap(h *common_helper.Helper, _ context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + logger.Info("proxy CA configmap reconciliation skipped (no proxy config in minimal mapping)") + return nil +} + +// reconcileNetworkPolicy ensures the app server network policy exists and is up to date. +func reconcileNetworkPolicy(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + np := &networkingv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: OpenStackLightspeedAppServerNetworkPolicyName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), np, func() error { + // Set Spec (wholesale replacement, same as before) + np.Spec = networkingv1.NetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: generateAppServerSelectorLabels(), + }, + Ingress: []networkingv1.NetworkPolicyIngressRule{ + { + Ports: []networkingv1.NetworkPolicyPort{ + { + Protocol: toPtr(corev1.ProtocolTCP), + Port: toPtr(intstr.FromInt32(OpenStackLightspeedAppServerContainerPort)), + }, + }, + }, + }, + PolicyTypes: []networkingv1.PolicyType{ + networkingv1.PolicyTypeIngress, + }, + } + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), np, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreateAppServerNetworkPolicy, err) + } + + logger.Info("App server NetworkPolicy reconciled", "name", np.Name, "result", result) + return nil +} + +// reconcileDeployment ensures the LCore deployment exists and is up to date. +func reconcileDeployment(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + deployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: LCoreDeploymentName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), deployment, func() error { + // Build the desired pod template spec + podTemplateSpec, err := buildLCorePodTemplateSpec(h, ctx, instance) + if err != nil { + return err + } + + // Selective field updates (avoid update loops) + replicas := int32(1) + deployment.Spec.Replicas = &replicas + deployment.Spec.Selector = &metav1.LabelSelector{ + MatchLabels: generateAppServerSelectorLabels(), + } + deployment.Spec.Template = podTemplateSpec + + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), deployment, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreateAPIDeployment, err) + } + + logger.Info("LCore Deployment reconciled", "name", deployment.Name, "result", result) + return nil +} + +// reconcileService ensures the OpenStack Lightspeed app server service exists and is up to date. +// Always uses the service-ca annotation for TLS certificate provisioning. +func reconcileService(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: OpenStackLightspeedAppServerServiceName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), svc, func() error { + // Selective field updates (preserves ClusterIP, ClusterIPs, etc.) + svc.Spec.Selector = generateAppServerSelectorLabels() + svc.Spec.Ports = []corev1.ServicePort{ + { + Name: "https", + Port: OpenStackLightspeedAppServerServicePort, + Protocol: corev1.ProtocolTCP, + TargetPort: intstr.FromInt32(OpenStackLightspeedAppServerContainerPort), + }, + } + svc.Spec.Type = corev1.ServiceTypeClusterIP + + // Set service-ca annotation for TLS certificate provisioning + if svc.Annotations == nil { + svc.Annotations = make(map[string]string) + } + svc.Annotations[ServingCertSecretAnnotationKey] = OpenStackLightspeedCertsSecretName + + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), svc, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreateAPIService, err) + } + + logger.Info("App server Service reconciled", "name", svc.Name, "result", result) + return nil +} + +// reconcileTLSSecret waits for the TLS secret to be populated by the service-ca +// operator with tls.key and tls.crt data. +func reconcileTLSSecret(h *common_helper.Helper, ctx context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + logger.Info("waiting for TLS secret to be populated", "name", OpenStackLightspeedCertsSecretName) + + secretKey := client.ObjectKey{ + Name: OpenStackLightspeedCertsSecretName, + Namespace: h.GetBeforeObject().GetNamespace(), + } + + err := wait.PollUntilContextTimeout(ctx, 2*time.Second, ResourceCreationTimeout, true, func(ctx context.Context) (bool, error) { + secret := &corev1.Secret{} + if err := h.GetClient().Get(ctx, secretKey, secret); err != nil { + if errors.IsNotFound(err) { + return false, nil + } + return false, err + } + _, hasKey := secret.Data["tls.key"] + _, hasCert := secret.Data["tls.crt"] + return hasKey && hasCert, nil + }) + if err != nil { + return fmt.Errorf("%w: %v", ErrGetTLSSecret, err) + } + + logger.Info("TLS secret is ready", "name", OpenStackLightspeedCertsSecretName) + return nil +} + +// reconcileDeleteClusterRoleBindingByLabels deletes ClusterRoleBinding resources by labels. +func reconcileDeleteClusterRoleBindingByLabels(h *common_helper.Helper, ctx context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + labelSelector := labels.Set(generateAppServerSelectorLabels()).AsSelector() + matchingLabels := client.MatchingLabelsSelector{Selector: labelSelector} + deleteOptions := &client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{ + LabelSelector: matchingLabels, + }, + } + + if err := h.GetClient().DeleteAllOf(ctx, &rbacv1.ClusterRoleBinding{}, deleteOptions); err != nil { + return fmt.Errorf("%w: %v", ErrDeleteSARClusterRoleBinding, err) + } + + logger.Info("SAR ClusterRoleBinding deleted successfully") + return nil +} + +// reconcileDeleteClusterRoleByLabels deletes ClusterRole resources by labels. +func reconcileDeleteClusterRoleByLabels(h *common_helper.Helper, ctx context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + logger := h.GetLogger() + + labelSelector := labels.Set(generateAppServerSelectorLabels()).AsSelector() + matchingLabels := client.MatchingLabelsSelector{Selector: labelSelector} + deleteOptions := &client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{ + LabelSelector: matchingLabels, + }, + } + + if err := h.GetClient().DeleteAllOf(ctx, &rbacv1.ClusterRole{}, deleteOptions); err != nil { + return fmt.Errorf("%w: %v", ErrDeleteSARClusterRole, err) + } + + logger.Info("SAR ClusterRole deleted successfully") + return nil +} diff --git a/internal/controller/llama_stack_config.go b/internal/controller/llama_stack_config.go new file mode 100644 index 00000000..68e7df20 --- /dev/null +++ b/internal/controller/llama_stack_config.go @@ -0,0 +1,440 @@ +/* + Copyright 2026. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "strings" + + common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" + "sigs.k8s.io/yaml" +) + +func buildLlamaStackCoreConfig(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) map[string]interface{} { + return map[string]interface{}{ + "version": "2", + + // image_name is a semantic identifier for the llama-stack configuration + // Note: Does NOT affect PostgreSQL database name (llama-stack uses hardcoded "llamastack") + "image_name": "openstack-lightspeed-configuration", + + // Minimal APIs for RAG + MCP: agents (for MCP), files, inference, safety (required by agents), + // telemetry, tool_runtime, vector_io. + "apis": []string{ + "agents", + "files", + "inference", + "safety", + "tool_runtime", + "vector_io", + }, + "benchmarks": []interface{}{}, + "container_image": nil, + "datasets": []interface{}{}, + "external_providers_dir": nil, + "inference_store": map[string]interface{}{ + "db_path": ".llama/distributions/ollama/inference_store.db", + "type": "sqlite", + }, + "logging": nil, + "metadata_store": map[string]interface{}{ + "db_path": "/tmp/llama-stack/registry.db", + "namespace": nil, + "type": "sqlite", + }, + } +} + +func buildLlamaStackFileProviders(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) []interface{} { + return []interface{}{ + map[string]interface{}{ + "provider_id": "localfs", + "provider_type": "inline::localfs", + "config": map[string]interface{}{ + "storage_dir": "/tmp/llama-stack-files", + "metadata_store": map[string]interface{}{ + "backend": "sql_default", + "namespace": "files_metadata", + "table_name": "files_metadata", + }, + }, + }, + } +} + +func buildLlamaStackAgentProviders(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) []interface{} { + return []interface{}{ + map[string]interface{}{ + "provider_id": "meta-reference", + "provider_type": "inline::meta-reference", + "config": map[string]interface{}{ + "persistence": map[string]interface{}{ + "agent_state": map[string]interface{}{ + "backend": "kv_default", + "table_name": "agent_state", + "namespace": "agent_state", + }, + "responses": map[string]interface{}{ + "backend": "sql_default", + "table_name": "agent_responses", + "namespace": "agent_responses", + }, + }, + }, + }, + } +} + +func buildLlamaStackInferenceProviders(_ *common_helper.Helper, _ context.Context, instance *apiv1beta1.OpenStackLightspeed) ([]interface{}, error) { + // Always include sentence-transformers for embeddings + providers := []interface{}{ + map[string]interface{}{ + "provider_id": "sentence-transformers", + "provider_type": "inline::sentence-transformers", + "config": map[string]interface{}{}, + }, + } + + // Add the LLM provider from the instance spec + { + provider := buildProvider(instance) + providerConfig := map[string]interface{}{ + "provider_id": provider.Name, + } + + // Convert provider name to valid environment variable name + envVarName := providerNameToEnvVarName(provider.Name) + + // Map provider types to Llama Stack provider types + switch provider.Type { + case "openai", "rhoai_vllm", "rhelai_vllm": + config := map[string]interface{}{} + // Determine the appropriate Llama Stack provider type: + // - OpenAI uses remote::openai + // - vLLM uses remote::vllm + var apiKeyField string + if provider.Type == "openai" { + providerConfig["provider_type"] = "remote::openai" + apiKeyField = "api_key" + } else { + providerConfig["provider_type"] = "remote::vllm" + apiKeyField = "api_token" + } + // Llama Stack will substitute ${env.VAR_NAME} with the actual env var value + config[apiKeyField] = fmt.Sprintf("${env.%s%s}", envVarName, EnvVarSuffixAPIKey) + + // Add custom URL if specified + if provider.URL != "" { + config["base_url"] = provider.URL + } + + providerConfig["config"] = config + + case "azure_openai": + providerConfig["provider_type"] = "remote::azure" + config := map[string]interface{}{} + + // Azure supports both API key and client credentials authentication + // Always include api_key (required by LiteLLM's Pydantic validation) + config["api_key"] = fmt.Sprintf("${env.%s_API_KEY}", envVarName) + + // Also include client credentials fields (will be empty if not using client credentials) + config["client_id"] = fmt.Sprintf("${env.%s_CLIENT_ID:=}", envVarName) + config["tenant_id"] = fmt.Sprintf("${env.%s_TENANT_ID:=}", envVarName) + config["client_secret"] = fmt.Sprintf("${env.%s_CLIENT_SECRET:=}", envVarName) + + // Azure-specific fields + if provider.AzureDeploymentName != "" { + config["deployment_name"] = provider.AzureDeploymentName + } + if provider.APIVersion != "" { + config["api_version"] = provider.APIVersion + } + if provider.URL != "" { + config["api_base"] = provider.URL + } + providerConfig["config"] = config + + case "watsonx", "bam": + // These providers are not supported by Llama Stack + // They are handled directly by lightspeed-stack (LCS), not Llama Stack + return nil, fmt.Errorf("provider type '%s' (provider '%s') is not currently supported by Llama Stack. Supported types: openai, azure_openai, rhoai_vllm, rhelai_vllm", provider.Type, provider.Name) + + default: + // Unknown provider type + return nil, fmt.Errorf("unknown provider type '%s' (provider '%s'). Supported types: openai, azure_openai, rhoai_vllm, rhelai_vllm", provider.Type, provider.Name) + } + + providers = append(providers, providerConfig) + } + + return providers, nil +} + +// Safety API - Required by agents provider (for MCP) +func buildLlamaStackSafety(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) []interface{} { + return []interface{}{ + map[string]interface{}{ + "provider_id": "llama-guard", + "provider_type": "inline::llama-guard", + "config": map[string]interface{}{ + "excluded_categories": []interface{}{}, + }, + }, + } +} + +func buildLlamaStackToolRuntime(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) []interface{} { + return []interface{}{ + map[string]interface{}{ + "provider_id": "model-context-protocol", + "provider_type": "remote::model-context-protocol", + "config": map[string]interface{}{}, + }, + map[string]interface{}{ + "provider_id": "rag-runtime", + "provider_type": "inline::rag-runtime", + "config": map[string]interface{}{}, + }, + } +} + +func buildLlamaStackVectorDB(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) []interface{} { + return []interface{}{ + map[string]interface{}{ + "provider_id": "faiss", + "provider_type": "inline::faiss", + "config": map[string]interface{}{ + "kvstore": map[string]interface{}{ + "backend": "sql_default", + "table_name": "vector_store", + }, + "persistence": map[string]interface{}{ + "backend": "kv_default", + "namespace": "vector_persistence", + }, + }, + }, + } +} + +func buildLlamaStackServerConfig(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) map[string]interface{} { + return map[string]interface{}{ + "auth": nil, + "host": "0.0.0.0", // Listen on all interfaces so lightspeed-stack container can connect + "port": LlamaStackContainerPort, + "quota": nil, + "tls_cafile": nil, + "tls_certfile": nil, + "tls_keyfile": nil, + } +} + +// buildLlamaStackStorage configures persistent storage for Llama Stack +func buildLlamaStackStorage(_ *common_helper.Helper, instance *apiv1beta1.OpenStackLightspeed) map[string]interface{} { + // Define storage backends - SQL only + backends := map[string]interface{}{ + "sql_default": map[string]interface{}{ + "type": "sql_sqlite", + "db_path": "/tmp/llama-stack/sql_store.db", + }, + "kv_default": map[string]interface{}{ + "type": "kv_sqlite", + "db_path": "/tmp/llama-stack/kv_store.db", + }, + "postgres_backend": map[string]interface{}{ + "type": "sql_postgres", + "host": fmt.Sprintf("lightspeed-postgres-server.%s.svc", instance.GetNamespace()), + "port": PostgresServicePort, + "user": "postgres", + "password": "${env.POSTGRES_PASSWORD}", + // Note: Database name is HARDCODED to "llamastack" in llama-stack's postgres adapter + // Not configurable - llama-stack ignores image_name for database selection + "ssl_mode": "require", + "ca_cert_path": "/etc/certs/postgres-ca/service-ca.crt", + "gss_encmode": "disable", + }, + } + + // Map data stores to backends - all use SQL with table_name + stores := map[string]interface{}{ + "metadata": map[string]interface{}{ + "namespace": "registry", + "backend": "kv_default", + }, + "inference": map[string]interface{}{ + "table_name": "inference_store", + "backend": "sql_default", + }, + "conversations": map[string]interface{}{ + "table_name": "openai_conversations", // Required by config schema but ignored - llama-stack uses hardcoded names + "backend": "postgres_backend", + }, + } + + return map[string]interface{}{ + "backends": backends, + "stores": stores, + } +} + +func buildLlamaStackVectorDBs(_ *common_helper.Helper, instance *apiv1beta1.OpenStackLightspeed) []interface{} { + vectorDBs := []interface{}{} + + // Use RAG configuration from instance if available + rags := buildLCoreRAGConfigs(instance, instance.Status.ActiveOCPRAGVersion) + if len(rags) > 0 { + for _, rag := range rags { + vectorDB := map[string]interface{}{ + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "embedding_dimension": 768, + "provider_id": "faiss", + } + + // Use IndexID if specified, otherwise generate a default + if rag.IndexID != "" { + vectorDB["vector_db_id"] = rag.IndexID + } else { + // Generate a simple ID from the image name + vectorDB["vector_db_id"] = "rag_" + sanitizeID(rag.Image) + } + + vectorDBs = append(vectorDBs, vectorDB) + } + } else { + // Default fallback if no RAG configured + vectorDBs = append(vectorDBs, map[string]interface{}{ + "vector_db_id": "my_knowledge_base", + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "embedding_dimension": 768, + "provider_id": "faiss", + }) + } + + return vectorDBs +} + +func buildLlamaStackModels(_ *common_helper.Helper, instance *apiv1beta1.OpenStackLightspeed) []interface{} { + models := []interface{}{ + // Always include sentence-transformers embedding model for RAG + map[string]interface{}{ + "model_id": "sentence-transformers/all-mpnet-base-v2", + "model_type": "embedding", + "provider_id": "sentence-transformers", + "provider_model_id": "sentence-transformers/all-mpnet-base-v2", + "metadata": map[string]interface{}{ + "embedding_dimension": 768, + }, + }, + } + + // Add LLM models from the instance spec + { + provider := buildProvider(instance) + for _, model := range provider.Models { + modelConfig := map[string]interface{}{ + "model_id": model.Name, + "model_type": "llm", + "provider_id": provider.Name, + "provider_model_id": model.Name, + } + + // Add model-specific metadata if available + metadata := map[string]interface{}{} + if model.MaxTokensForResponse > 0 { + metadata["max_tokens"] = model.MaxTokensForResponse + } + if len(metadata) > 0 { + modelConfig["metadata"] = metadata + } + + models = append(models, modelConfig) + } + } + + return models +} + +func buildLlamaStackToolGroups(_ *common_helper.Helper, _ *apiv1beta1.OpenStackLightspeed) []interface{} { + return []interface{}{ + map[string]interface{}{ + "toolgroup_id": "builtin::rag", + "provider_id": "rag-runtime", + }, + } +} + +// buildLlamaStackYAML assembles the complete Llama Stack configuration and converts to YAML +func buildLlamaStackYAML(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) (string, error) { + // Build the complete config as a map + config := buildLlamaStackCoreConfig(h, instance) + + // Build inference providers with error handling + inferenceProviders, err := buildLlamaStackInferenceProviders(h, ctx, instance) + if err != nil { + return "", fmt.Errorf("failed to build inference providers: %w", err) + } + + // Build providers map - only include providers for enabled APIs + config["providers"] = map[string]interface{}{ + "files": buildLlamaStackFileProviders(h, instance), + "agents": buildLlamaStackAgentProviders(h, instance), + "inference": inferenceProviders, + "safety": buildLlamaStackSafety(h, instance), + "tool_runtime": buildLlamaStackToolRuntime(h, instance), + "vector_io": buildLlamaStackVectorDB(h, instance), + } + + // Add top-level fields + config["scoring_fns"] = []interface{}{} + config["server"] = buildLlamaStackServerConfig(h, instance) + config["storage"] = buildLlamaStackStorage(h, instance) + config["vector_dbs"] = buildLlamaStackVectorDBs(h, instance) + config["models"] = buildLlamaStackModels(h, instance) + config["tool_groups"] = buildLlamaStackToolGroups(h, instance) + config["telemetry"] = map[string]interface{}{ + "enabled": false, + } + + // Convert to YAML + yamlBytes, err := yaml.Marshal(config) + if err != nil { + return "", fmt.Errorf("failed to marshal Llama Stack config to YAML: %w", err) + } + + return string(yamlBytes), nil +} + +// sanitizeID creates a valid ID from an image name. It extracts just the image name without +// registry/tag (e.g., "quay.io/my-org/my-rag:latest" -> "my-rag") +func sanitizeID(image string) string { + parts := strings.Split(image, "/") + name := parts[len(parts)-1] + name = strings.Split(name, ":")[0] + + // Replace invalid characters with underscores + name = strings.Map(func(r rune) rune { + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_' || r == '-' { + return r + } + return '_' + }, name) + + return name +} diff --git a/internal/controller/ocp_version.go b/internal/controller/ocp_version.go index bafd6504..14f65bc0 100644 --- a/internal/controller/ocp_version.go +++ b/internal/controller/ocp_version.go @@ -23,7 +23,9 @@ import ( "slices" "strings" + "github.com/openstack-k8s-operators/lib-common/modules/common/condition" common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "sigs.k8s.io/controller-runtime/pkg/client" @@ -45,10 +47,98 @@ const ( // SupportedOCPVersions lists the OCP versions available in the RAG database var SupportedOCPVersions = []string{OCPVersion416, OCPVersion418, OCPVersionLatest} +// resolveOCPVersion detects and resolves the OCP version to use for RAG configuration. +// Returns the active OCP version to use (or empty string if OCP RAG is disabled). +func (r *OpenStackLightspeedReconciler) resolveOCPVersion( + ctx context.Context, + helper *common_helper.Helper, + instance *apiv1beta1.OpenStackLightspeed, +) string { + Log := helper.GetLogger() + + // If OCP RAG is disabled, mark condition as True with "disabled" message + if !instance.Spec.EnableOCPRAG { + instance.Status.Conditions.MarkTrue( + apiv1beta1.OCPRAGCondition, + apiv1beta1.OCPRAGDisabledMessage, + ) + instance.Status.ActiveOCPRAGVersion = "" + return "" + } + + // Step 1: Detect cluster version + detectedVersion, err := DetectOCPVersion(ctx, helper) + + if err != nil { + Log.Info("Failed to detect OCP version, disabling OCP RAG", "error", err) + cond := condition.FalseCondition( + apiv1beta1.OCPRAGCondition, + condition.ErrorReason, + condition.SeverityError, + apiv1beta1.OCPRAGDetectionFailedMessage, + ) + cond.Message = fmt.Sprintf("%s: %s", apiv1beta1.OCPRAGDetectionFailedMessage, err.Error()) + instance.Status.Conditions.Set(cond) + instance.Status.ActiveOCPRAGVersion = "" + return "" + } + + Log.Info("Detected OCP cluster version", "version", detectedVersion) + + // Step 2: Resolve which version to use (with override and fallback) + activeVersion, isFallback, err := ResolveOCPVersion( + detectedVersion, + instance.Spec.OCPRAGVersionOverride, + instance.Spec.EnableOCPRAG, + ) + + if err != nil { + // Invalid override + Log.Error(err, "Invalid OCP version configuration") + cond := condition.FalseCondition( + apiv1beta1.OCPRAGCondition, + condition.ErrorReason, + condition.SeverityError, + apiv1beta1.OCPRAGOverrideInvalidMessage, + ) + cond.Message = fmt.Sprintf("%s: %s", apiv1beta1.OCPRAGOverrideInvalidMessage, err.Error()) + instance.Status.Conditions.Set(cond) + instance.Status.ActiveOCPRAGVersion = "" + return "" + } + + // Step 3: Update status and conditions based on resolution + instance.Status.ActiveOCPRAGVersion = activeVersion + + if isFallback { + Log.Info("Using 'latest' OCP documentation as fallback", + "detectedVersion", detectedVersion, + "supportedVersions", SupportedOCPVersions) + + cond := condition.TrueCondition( + apiv1beta1.OCPRAGCondition, + "Fallback", + ) + cond.Message = fmt.Sprintf(apiv1beta1.OCPRAGVersionFallbackMessage, + detectedVersion, SupportedOCPVersions) + instance.Status.Conditions.Set(cond) + } else { + Log.Info("Using OCP RAG documentation", "version", activeVersion) + cond := condition.TrueCondition( + apiv1beta1.OCPRAGCondition, + "Resolved", + ) + cond.Message = fmt.Sprintf(apiv1beta1.OCPRAGVersionResolvedMessage, activeVersion) + instance.Status.Conditions.Set(cond) + } + + return activeVersion +} + // DetectOCPVersion detects the OpenShift cluster version func DetectOCPVersion(ctx context.Context, helper *common_helper.Helper) (string, error) { // Use raw client to access cluster-scoped resources - rawClient, err := GetRawClient(helper) + rawClient, err := getRawClient(helper) if err != nil { return "", fmt.Errorf("failed to get raw client: %w", err) } @@ -152,3 +242,27 @@ func ResolveOCPVersion(detectedVersion, overrideVersion string, enableOCPRAG boo // Fallback to latest for unsupported versions return OCPVersionLatest, true, nil } + +// BuildRAGConfigs builds the RAG configuration array. +// OpenStack RAG is always included first. +// OCP RAG is added if ocpVersion is provided. +func BuildRAGConfigs(instance *apiv1beta1.OpenStackLightspeed, ocpVersion string) []interface{} { + rags := []interface{}{ + // OpenStack RAG + map[string]interface{}{ + "image": instance.Spec.RAGImage, + "indexPath": OpenStackLightspeedVectorDBPath, + }, + } + + // Add OCP RAG if enabled + if ocpVersion != "" { + rags = append(rags, map[string]interface{}{ + "image": instance.Spec.RAGImage, + "indexPath": GetOCPVectorDBPath(ocpVersion), + "indexID": GetOCPIndexName(ocpVersion), + }) + } + + return rags +} diff --git a/internal/controller/ols_install.go b/internal/controller/ols_install.go deleted file mode 100644 index 9749c817..00000000 --- a/internal/controller/ols_install.go +++ /dev/null @@ -1,427 +0,0 @@ -/* -Copyright 2025. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// This file contains the logic for managing and ensuring the installation of -// the OpenShift Lightspeed (OLS) Operator in a cluster. -package controller - -import ( - "context" - "errors" - "fmt" - "os" - "strings" - - apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" - "sigs.k8s.io/controller-runtime/pkg/client" - - common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - operatorsv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" - k8s_errors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/ptr" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" -) - -const ( - // OLSOperatorName - Name of the OpenShift Lightspeed operator. - OLSOperatorName = "lightspeed-operator" -) - -// EnsureOLSOperatorInstalled ensures that a compatible OLS Operator is present in the cluster. -// If the operator already exists, this checks that it matches the required version (otherwise it fails). -// If it is missing, this attempts to install the correct version. -func EnsureOLSOperatorInstalled( - ctx context.Context, - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, -) (bool, error) { - isUserInstalledOLSOperator, err := IsUserInstalledOLSOperatorMode(ctx, helper, instance) - if err != nil { - return false, err - } - - if isUserInstalledOLSOperator { - return false, errors.New( - "detected an existing OpenShift Lightspeed operator installation. " + - "Please uninstall OpenShift Lightspeed operator and allow the " + - "OpenStack Lightspeed operator to manage its installation automatically") - } - - OLSOperatorInstalled, err := InstallInstanceOwnedOLSOperator(ctx, helper, instance) - if err != nil { - return false, err - } - - return OLSOperatorInstalled, nil -} - -// InstallInstanceOwnedOLSOperator - ensures that the OpenShift Lightspeed Operator (OLS Operator) -// is installed and owned by the specified OpenStackLightspeed instance. This function: -// 1. Determines the recommended OLS Operator version. -// 2. Creates or updates a Subscription, setting the instance as its owner. -// 3. Approves the related InstallPlan manually. -// 4. Sets ownership of the generated ClusterServiceVersion (CSV) to the instance. -// 5. Returns true if the OLS Operator is installed and owned by the instance, or an error otherwise. -func InstallInstanceOwnedOLSOperator( - ctx context.Context, - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, -) (bool, error) { - subscription := &operatorsv1alpha1.Subscription{ - ObjectMeta: metav1.ObjectMeta{ - Name: GetOLSSubscriptionName(instance), - Namespace: instance.Namespace, - }, - } - - instanceOwnerReference := []metav1.OwnerReference{ - { - APIVersion: instance.APIVersion, - Kind: instance.Kind, - Name: instance.GetName(), - UID: instance.GetUID(), - Controller: ptr.To(true), - BlockOwnerDeletion: ptr.To(true), - }, - } - opResult, err := controllerutil.CreateOrUpdate(ctx, helper.GetClient(), subscription, func() error { - subscription.Spec = &operatorsv1alpha1.SubscriptionSpec{ - Channel: "stable", - InstallPlanApproval: operatorsv1alpha1.ApprovalManual, - CatalogSource: instance.Spec.CatalogSourceName, - CatalogSourceNamespace: instance.Spec.CatalogSourceNamespace, - Package: OLSOperatorName, - } - - err := SetStartingCSV(subscription) - if err != nil { - return err - } - - subscription.SetOwnerReferences(instanceOwnerReference) - - return nil - }) - if err != nil { - return false, err - } - - // If the Subscription was just created, or if it doesn't yet contain an InstallPlanRef, - // return (false, nil) -> wait. Attempting to approve the InstallPlan before it is properly - // linked can cause OLM to create unnecessary additional InstallPlans. - if opResult != controllerutil.OperationResultNone || subscription.Status.InstallPlanRef == nil { - return false, nil - } - - // Because we've set the subscription to require manual approval, we need to explicitly - // approve the InstallPlan at this point. Manual approval is used to prevent OLM from - // automatically upgrading the operator to a newer version than we've tested. This way, - // we ensure that only the specific OLS Operator version we've tested is installed. - installPlanApproved, err := ApproveOLSOperatorInstallPlan(ctx, helper, instance) - if err != nil { - return false, err - } else if !installPlanApproved { - return false, nil - } - - // Ensure the CSV is owned by this instance. This helps determine during - // deletion if the OLS Operator was installed by us or pre-existed before - // the instance. - OLSOperatorCSV, err := GetOLSOperatorCSV(ctx, helper) - if err != nil { - return false, err - } else if OLSOperatorCSV == nil { - return false, nil - } - - OLSOperatorCSV.SetOwnerReferences(instanceOwnerReference) - err = helper.GetClient().Update(ctx, OLSOperatorCSV) - if err != nil && k8s_errors.IsConflict(err) { - return false, nil - } else if err != nil { - return false, err - } - - return InstanceOwnedOLSOperatorComplete(ctx, helper, instance) -} - -// InstanceOwnedOLSOperatorComplete checks if the OLS Operator's CSV is owned -// by the given OpenStackLightspeed instance and is in the Succeeded phase. -func InstanceOwnedOLSOperatorComplete( - ctx context.Context, - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, -) (bool, error) { - OLSOperatorCSV, err := GetOLSOperatorCSV(ctx, helper) - if err != nil { - return false, err - } else if OLSOperatorCSV == nil { - return false, nil - } - - // When the OLS Operator CSV is owned by us and it is in the Succeeded phase - // we can be certain that the deployment of OLS Operator is over. - return IsOwnedBy(OLSOperatorCSV, instance) && OLSOperatorCSV.Status.Phase == operatorsv1alpha1.CSVPhaseSucceeded, nil -} - -// GetRecommendedOLSVersion returns the recommended version of the OpenShift -// Lightspeed (OLS) operator to deploy. This version is obtained from the environment -// variable "OPENSHIFT_LIGHTSPEED_OPERATOR_VERSION". If the variable is unset or empty, -// an error is returned. If the value is "latest", an empty string and no error are returned. -// This indicates the rest of the OLS installation code can install the latest version -// of OLS operator since no specific version is set. -func GetRecommendedOLSVersion() (string, error) { - version := os.Getenv("OPENSHIFT_LIGHTSPEED_OPERATOR_VERSION") - switch version { - case "": - return "", errors.New("environment variable OPENSHIFT_LIGHTSPEED_OPERATOR_VERSION is not set") - case "latest": - return "", nil - default: - return version, nil - } -} - -// GetOLSOperatorCSV - retrieves the ClusterServiceVersion (CSV) for the OpenShift Lightspeed operator -// from all namespaces in the OpenShift cluster. It returns the first CSV it finds whose name begins -// with the OLSOperatorName. If no such CSV is found, it returns (nil, nil). If there is an error -// while listing the CSV resources, that error is returned. -func GetOLSOperatorCSV( - ctx context.Context, - helper *common_helper.Helper, -) (*operatorsv1alpha1.ClusterServiceVersion, error) { - // Use a dedicated client here because the default controller-runtime client may be restricted - // to WATCH_NAMESPACE. This ensures we can retrieve CSVs from all namespaces cluster-wide. - rawClient, err := GetRawClient(helper) - if err != nil { - return nil, err - } - - var CSVs operatorsv1alpha1.ClusterServiceVersionList - err = rawClient.List(ctx, &CSVs, client.InNamespace("")) - if err != nil && k8s_errors.IsNotFound(err) { - return nil, nil - } else if err != nil { - return nil, err - } - - for _, CSV := range CSVs.Items { - if strings.HasPrefix(CSV.GetName(), OLSOperatorName) { - return &CSV, nil - } - } - - return nil, nil -} - -// IsUserInstalledOLSOperatorMode checks if an OpenShift Lightspeed Operator -// (OLS Operator) is installed in the cluster (by the user), but was NOT installed/owned by -// this specific OpenStackLightspeed instance. Returns true only if there is an OLS OperatorIsOwnedBy -// ClusterServiceVersion (CSV) found, and that CSV is NOT owned by the given instance. -func IsUserInstalledOLSOperatorMode( - ctx context.Context, - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, -) (bool, error) { - OLSOperatorCSV, err := GetOLSOperatorCSV(ctx, helper) - if err != nil { - return false, err - } else if OLSOperatorCSV == nil { - // Note: If no CSV is found we can be 100 % certain we are in the InstanceOwned - // installation mode. - return false, nil - } - - subscription := &operatorsv1alpha1.Subscription{} - err = helper.GetClient().Get(ctx, client.ObjectKey{ - Name: GetOLSSubscriptionName(instance), - Namespace: instance.Namespace, - }, subscription) - if err != nil && !k8s_errors.IsNotFound(err) { - return false, err - } - - userInstalledMode := !IsOwnedBy(OLSOperatorCSV, instance) && !IsOwnedBy(subscription, instance) - return userInstalledMode, nil -} - -// UninstallInstanceOwnedOLSOperator ensures that the OLS Operator installed by -// a specific OpenStackLightspeed instance is uninstalled from the cluster. The function -// checks if the ClusterServiceVersion (CSV) for the OLS Operator exists and whether it -// is owned by the given OpenStackLightspeed instance. If so, it deletes the CSV. -// The function then checks whether the CSV has been successfully removed. It returns -// true if the operator CSV is no longer found (i.e., uninstalled), or an error if an -// unexpected problem occurs. -func UninstallInstanceOwnedOLSOperator( - ctx context.Context, - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, -) (bool, error) { - OLSOperatorCSV, err := GetOLSOperatorCSV(ctx, helper) - if err != nil { - return false, err - } else if OLSOperatorCSV == nil { - return true, nil - } - - if !IsOwnedBy(OLSOperatorCSV, instance) { - return true, nil - } - - // When the operator is installed via OLM, the OpenStack Lightspeed Subscription - // is also set as an owner of its InstallPlan, resulting in the InstallPlan having - // both the OLS Subscription and the OpenStackLightspeed resources as owners. - // When uninstalling the OLS operator, only the OLS Subscription owner reference is removed, - // which causes the InstallPlans to remain and accumulate over time. To avoid this, - // we explicitly attempt to delete the relevant InstallPlan to prevent leftovers. - _, err = DeleteOLSOperatorInstallPlan(ctx, helper, instance) - if err != nil { - return false, err - } - - if err := helper.GetClient().Delete(ctx, OLSOperatorCSV); err != nil { - return false, err - } - - OLSOperatorCSV, err = GetOLSOperatorCSV(ctx, helper) - if err != nil { - return false, err - } else if OLSOperatorCSV != nil { - return false, nil - } - - OLSInstallPlan, err := GetOLSOperatorInstallPlan(ctx, helper, instance) - if err != nil { - return false, err - } else if OLSInstallPlan != nil { - return false, nil - } - - return true, nil -} - -// GetOLSOperatorInstallPlan returns the InstallPlan that was used to install -// the OpenShift Lightspeed Operator (OLS Operator). It searches for an InstallPlan -// whose ClusterServiceVersion name matches the OLS Operator prefix and the -// recommended OLS version. If such an InstallPlan exists, it is returned; otherwise, -// the function returns nil. -func GetOLSOperatorInstallPlan( - ctx context.Context, - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, -) (*operatorsv1alpha1.InstallPlan, error) { - var installPlans operatorsv1alpha1.InstallPlanList - err := helper.GetClient().List(ctx, &installPlans, client.InNamespace(instance.Namespace)) - if err != nil { - return nil, err - } - - recommendedOLSVersion, err := GetRecommendedOLSVersion() - if err != nil { - return nil, err - } - - for _, installPlan := range installPlans.Items { - var isOLSOperatorCSV bool - for _, csvName := range installPlan.Spec.ClusterServiceVersionNames { - if strings.HasPrefix(csvName, OLSOperatorName) && strings.HasSuffix(csvName, recommendedOLSVersion) { - isOLSOperatorCSV = true - break - } - } - - if isOLSOperatorCSV { - return &installPlan, nil - } - } - - return nil, nil -} - -// ApproveOLSOperatorInstallPlan approves the InstallPlan that is responsible for installing -// the OpenShift Lightspeed Operator (OLS Operator) in the given OpenStackLightspeed instance's -// namespace. It sets the Approved field to true and updates the InstallPlan resource in the cluster. -// Returns true if the approval succeeds, false and an error otherwise. -func ApproveOLSOperatorInstallPlan( - ctx context.Context, - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, -) (bool, error) { - installPlan, err := GetOLSOperatorInstallPlan(ctx, helper, instance) - if err != nil { - return false, err - } else if installPlan == nil { - return false, nil - } - - installPlan.Spec.Approved = true - err = helper.GetClient().Update(ctx, installPlan) - if err != nil { - return false, err - } - - return true, nil -} - -// DeleteOLSOperatorInstallPlan deletes the InstallPlan associated with installing the -// OpenShift Lightspeed Operator (OLS Operator) in the specified OpenStackLightspeed instance's -// namespace. If the InstallPlan does not exist, the function returns true. It returns true -// if the deletion succeeds or the InstallPlan was not found, and false with an error otherwise. -func DeleteOLSOperatorInstallPlan( - ctx context.Context, - helper *common_helper.Helper, - instance *apiv1beta1.OpenStackLightspeed, -) (bool, error) { - installPlan, err := GetOLSOperatorInstallPlan(ctx, helper, instance) - if err != nil { - return false, err - } else if installPlan == nil { - return true, nil - } - - err = helper.GetClient().Delete(ctx, installPlan) - if err != nil && k8s_errors.IsNotFound(err) { - return true, nil - } else if err != nil { - return false, err - } - - return true, nil -} - -// GetOLSSubscriptionName generates a unique subscription name for the OpenStack Lightspeed Operator -// by appending the first 5 characters of the instance's UID. This reduces the likelihood of -// naming collisions with existing subscriptions that may have been created manually by the user. -func GetOLSSubscriptionName(instance *apiv1beta1.OpenStackLightspeed) string { - return fmt.Sprintf("%s-%s", OLSOperatorName, string(instance.GetUID())[:5]) -} - -// SetStartingCSV sets the StartingCSV field of the given Subscription based on -// the recommended OLS operator version. If the recommended version is "", -// StartingCSV is not set to allow OLM to select the latest compatible version. -func SetStartingCSV(subscription *operatorsv1alpha1.Subscription) error { - recommendedVersion, err := GetRecommendedOLSVersion() - if err != nil { - return err - } - - if recommendedVersion != "" { - subscription.Spec.StartingCSV = fmt.Sprintf("%s.v%s", OLSOperatorName, recommendedVersion) - } - - return nil -} diff --git a/internal/controller/openstacklightspeed_controller.go b/internal/controller/openstacklightspeed_controller.go index 67c32dd2..c109d562 100644 --- a/internal/controller/openstacklightspeed_controller.go +++ b/internal/controller/openstacklightspeed_controller.go @@ -19,12 +19,14 @@ package controller import ( "context" "fmt" - "time" "github.com/go-logr/logr" "github.com/openstack-k8s-operators/lib-common/modules/common/condition" common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" operatorsv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" @@ -56,24 +58,18 @@ func (r *OpenStackLightspeedReconciler) GetLogger(ctx context.Context) logr.Logg // +kubebuilder:rbac:groups=lightspeed.openstack.org,resources=openstacklightspeeds,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=lightspeed.openstack.org,resources=openstacklightspeeds/status,verbs=get;update;patch // +kubebuilder:rbac:groups=lightspeed.openstack.org,resources=openstacklightspeeds/finalizers,verbs=update -// +kubebuilder:rbac:groups=ols.openshift.io,resources=olsconfigs,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=ols.openshift.io,resources=olsconfigs/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=ols.openshift.io,resources=olsconfigs/finalizers,verbs=update +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=get;list;watch;create;patch;update;delete;deletecollection +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;watch;create;patch;update;delete;deletecollection // +kubebuilder:rbac:groups=operators.coreos.com,resources=clusterserviceversions,verbs=get;list;watch -// +kubebuilder:rbac:groups=operators.coreos.com,resources=clusterserviceversions,namespace=openshift-lightspeed,verbs=update;patch;delete -// +kubebuilder:rbac:groups=operators.coreos.com,resources=subscriptions,namespace=openshift-lightspeed,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=operators.coreos.com,resources=installplans,namespace=openshift-lightspeed,verbs=get;list;watch;update;delete +// +kubebuilder:rbac:groups=operators.coreos.com,resources=clusterserviceversions,namespace=openstack-lightspeed,verbs=update;patch;delete // +kubebuilder:rbac:groups=config.openshift.io,resources=clusterversions,verbs=get;list;watch +// +kubebuilder:rbac:groups=networking.k8s.io,resources=networkpolicies,namespace=openstack-lightspeed,verbs=get;list;watch;create;patch;update +// +kubebuilder:rbac:groups=apps,resources=deployments,namespace=openstack-lightspeed,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups="",resources=configmaps,namespace=openstack-lightspeed,verbs=get;list;watch;create;patch;update;delete +// +kubebuilder:rbac:groups="",resources=secrets,namespace=openstack-lightspeed,verbs=get;list;watch;create;patch;update;delete;deletecollection +// +kubebuilder:rbac:groups="",resources=services,namespace=openstack-lightspeed,verbs=get;list;watch;create;patch;update +// +kubebuilder:rbac:groups="",resources=serviceaccounts,namespace=openstack-lightspeed,verbs=get;list;watch;create;patch -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the OpenStackLightspeed object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.4/pkg/reconcile func (r *OpenStackLightspeedReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { Log := r.GetLogger(ctx) Log.Info("OpenStackLightspeed Reconciling") @@ -143,11 +139,15 @@ func (r *OpenStackLightspeedReconciler) Reconcile(ctx context.Context, req ctrl. instance.Status.Conditions.Init(&cl) instance.Status.ObservedGeneration = instance.Generation + // TODO(lpiwowar): Use the resolve OCP version when we add the RAG deployment // OCP Version Detection and Resolution - must be done early so status field is always set - r.resolveOCPVersion(ctx, helper, instance) + _ = r.resolveOCPVersion(ctx, helper, instance) if !instance.DeletionTimestamp.IsZero() { - return r.reconcileDelete(ctx, helper, instance) + if err := r.reconcileDelete(ctx, helper, instance); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil } if instance.DeletionTimestamp.IsZero() && controllerutil.AddFinalizer(instance, helper.GetFinalizer()) { @@ -162,218 +162,95 @@ func (r *OpenStackLightspeedReconciler) Reconcile(ctx context.Context, req ctrl. instance.Spec.MaxTokensForResponse = apiv1beta1.OpenStackLightspeedDefaultValues.MaxTokensForResponse } - // Ensure a compatible version of the OpenShift Lightspeed Operator is running in the cluster. - // This checks if the correct OLS Operator version is present and installs it if necessary. - isOLSOperatorInstalled, err := EnsureOLSOperatorInstalled(ctx, helper, instance) - if err != nil { - instance.Status.Conditions.Set(condition.FalseCondition( - apiv1beta1.OpenShiftLightspeedOperatorReadyCondition, - condition.ErrorReason, - condition.SeverityWarning, - condition.DeploymentReadyErrorMessage, - err.Error(), - )) - - return ctrl.Result{}, nil - } else if !isOLSOperatorInstalled { - instance.Status.Conditions.Set(condition.FalseCondition( - apiv1beta1.OpenShiftLightspeedOperatorReadyCondition, - condition.RequestedReason, - condition.SeverityInfo, - apiv1beta1.OpenShiftLightspeedOperatorWaiting, - )) - - // In this branch we know that the - return ctrl.Result{Requeue: true, RequeueAfter: 10 * time.Second}, nil - } - - // Mark the OpenShift Lightspeed Operator as ready in the status conditions. - instance.Status.Conditions.MarkTrue( - apiv1beta1.OpenShiftLightspeedOperatorReadyCondition, - apiv1beta1.OpenShiftLightspeedOperatorReady, - ) - - // NOTE: We cannot consume the OLSConfig definition directly from the OLS operator's code due to - // a conflict in Go versions. When this comment was written, the min. required Go version for - // openstack-operator was 1.21 whereas OLS operator required at least Go version 1.23. Once the - // Go versions catch up with each other we should consider consuming OLSConfig directly from OLS - // operator and updating this code and any subsequent code that consumes this structure. - olsConfig := uns.Unstructured{} - olsConfigGVK := schema.GroupVersionKind{ - Group: "ols.openshift.io", - Version: "v1alpha1", - Kind: "OLSConfig", + reconcileTasks := []ReconcileTask{ + {Name: "PostgresResources", Task: ReconcilePostgresResources}, + {Name: "PostgresDeployment", Task: ReconcilePostgresDeployment}, + {Name: "LCoreResources", Task: ReconcileLCoreResources}, + {Name: "LCoreDeployment", Task: ReconcileLCoreDeployment}, } - olsConfig.SetGroupVersionKind(olsConfigGVK) - olsConfig.SetName(OLSConfigName) - - _, err = controllerutil.CreateOrPatch(ctx, r.Client, &olsConfig, func() error { - // Check if the OpenStackLightspeed instance that is being processed owns the OLSConfig. If - // it is owned by other OpenStackLightspeed instance stop the reconciliation. - olsConfigLabels := olsConfig.GetLabels() - ownerLabel := "" - if val, ok := olsConfigLabels[OpenStackLightspeedOwnerIDLabel]; ok { - ownerLabel = val - } - - if ownerLabel != "" && ownerLabel != string(instance.GetObjectMeta().GetUID()) { - return fmt.Errorf("OLSConfig is managed by different OpenStackLightspeed instance") - } - - err = PatchOLSConfig(helper, instance, &olsConfig) - if err != nil { - return err - } - - return nil - }) - if err != nil { + if err := ReconcileTasks(helper, ctx, instance, reconcileTasks); err != nil { instance.Status.Conditions.Set(condition.FalseCondition( apiv1beta1.OpenStackLightspeedReadyCondition, condition.ErrorReason, condition.SeverityWarning, - condition.DeploymentReadyErrorMessage, - err.Error())) - return ctrl.Result{}, err - } - - OLSConfigReady, err := IsOLSConfigReady(ctx, helper) - if err != nil { + apiv1beta1.DeploymentCheckFailedMessage, + err.Error(), + )) return ctrl.Result{}, err } - if OLSConfigReady { - instance.Status.Conditions.MarkTrue( - apiv1beta1.OpenStackLightspeedReadyCondition, - apiv1beta1.OpenStackLightspeedReadyMessage, - ) - Log.Info("OLSConfig is ready!") - } else { - Log.Info("OLSConfig is not ready yet. Waiting...") - return ctrl.Result{RequeueAfter: time.Second * time.Duration(5)}, nil - } - - Log.Info("OpenStackLightspeed Reconciled successfully") - return ctrl.Result{}, nil + return r.reconcileStatus(ctx, helper, instance) } -// resolveOCPVersion detects and resolves the OCP version to use for RAG configuration. -// Returns the active OCP version to use (or empty string if OCP RAG is disabled). -func (r *OpenStackLightspeedReconciler) resolveOCPVersion( +// reconcileDelete reconciles the deletion of OpenStackLightspeed instance +func (r *OpenStackLightspeedReconciler) reconcileDelete( ctx context.Context, helper *common_helper.Helper, instance *apiv1beta1.OpenStackLightspeed, -) string { - Log := helper.GetLogger() - - // If OCP RAG is disabled, mark condition as True with "disabled" message - if !instance.Spec.EnableOCPRAG { - instance.Status.Conditions.MarkTrue( - apiv1beta1.OCPRAGCondition, - apiv1beta1.OCPRAGDisabledMessage, - ) - instance.Status.ActiveOCPRAGVersion = "" - return "" - } - - // Step 1: Detect cluster version - detectedVersion, err := DetectOCPVersion(ctx, helper) +) error { + Log := r.GetLogger(ctx) + Log.Info("OpenStackLightspeed Reconciling Delete") - if err != nil { - Log.Info("Failed to detect OCP version, disabling OCP RAG", "error", err) - cond := condition.FalseCondition( - apiv1beta1.OCPRAGCondition, - condition.ErrorReason, - condition.SeverityError, - apiv1beta1.OCPRAGDetectionFailedMessage, - ) - cond.Message = fmt.Sprintf("%s: %s", apiv1beta1.OCPRAGDetectionFailedMessage, err.Error()) - instance.Status.Conditions.Set(cond) - instance.Status.ActiveOCPRAGVersion = "" - return "" + // Delete cluster-scoped resources using fail-fast pattern + deletionTasks := []ReconcileTask{ + {Name: "DeleteSARClusterRoleBinding", Task: reconcileDeleteClusterRoleBindingByLabels}, + {Name: "DeleteSARClusterRole", Task: reconcileDeleteClusterRoleByLabels}, } - Log.Info("Detected OCP cluster version", "version", detectedVersion) - - // Step 2: Resolve which version to use (with override and fallback) - activeVersion, isFallback, err := ResolveOCPVersion( - detectedVersion, - instance.Spec.OCPRAGVersionOverride, - instance.Spec.EnableOCPRAG, - ) - - if err != nil { - // Invalid override - Log.Error(err, "Invalid OCP version configuration") - cond := condition.FalseCondition( - apiv1beta1.OCPRAGCondition, - condition.ErrorReason, - condition.SeverityError, - apiv1beta1.OCPRAGOverrideInvalidMessage, - ) - cond.Message = fmt.Sprintf("%s: %s", apiv1beta1.OCPRAGOverrideInvalidMessage, err.Error()) - instance.Status.Conditions.Set(cond) - instance.Status.ActiveOCPRAGVersion = "" - return "" + // Execute deletion tasks in order (fail-fast: stop on first error) + if err := ReconcileTasksFailFast(helper, ctx, instance, deletionTasks); err != nil { + Log.Error(err, "failed to delete cluster-scoped resources") + return err } - // Step 3: Update status and conditions based on resolution - instance.Status.ActiveOCPRAGVersion = activeVersion - - if isFallback { - Log.Info("Using 'latest' OCP documentation as fallback", - "detectedVersion", detectedVersion, - "supportedVersions", SupportedOCPVersions) - - cond := condition.TrueCondition( - apiv1beta1.OCPRAGCondition, - "Fallback", - ) - cond.Message = fmt.Sprintf(apiv1beta1.OCPRAGVersionFallbackMessage, - detectedVersion, SupportedOCPVersions) - instance.Status.Conditions.Set(cond) - } else { - Log.Info("Using OCP RAG documentation", "version", activeVersion) - cond := condition.TrueCondition( - apiv1beta1.OCPRAGCondition, - "Resolved", - ) - cond.Message = fmt.Sprintf(apiv1beta1.OCPRAGVersionResolvedMessage, activeVersion) - instance.Status.Conditions.Set(cond) - } + controllerutil.RemoveFinalizer(instance, helper.GetFinalizer()) - return activeVersion + Log.Info("OpenStackLightspeed Reconciling Delete completed") + return nil } -// reconcileDelete reconciles the deletion of OpenStackLightspeed instance -func (r *OpenStackLightspeedReconciler) reconcileDelete( +func (r *OpenStackLightspeedReconciler) reconcileStatus( ctx context.Context, helper *common_helper.Helper, instance *apiv1beta1.OpenStackLightspeed, ) (ctrl.Result, error) { - Log := r.GetLogger(ctx) - Log.Info("OpenStackLightspeed Reconciling Delete") - - isRemoved, err := RemoveOLSConfig(ctx, helper, instance) - if err != nil { - return ctrl.Result{}, err - } else if !isRemoved { - Log.Info("OLSConfig removal in progress ...") - return ctrl.Result{RequeueAfter: time.Second * 10}, nil + deployments := []string{ + PostgresDeploymentName, + LCoreDeploymentName, } + for _, deploymentName := range deployments { + deployment, err := getDeployment(ctx, helper, deploymentName, instance.Namespace) + if err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + apiv1beta1.OpenStackLightspeedReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + apiv1beta1.DeploymentCheckFailedMessage, + err.Error(), + )) + return ctrl.Result{}, err + } - isUninstalled, err := UninstallInstanceOwnedOLSOperator(ctx, helper, instance) - if err != nil { - return ctrl.Result{}, err - } else if !isUninstalled { - Log.Info("OLS Operator uninstallation in progress ...") - return ctrl.Result{RequeueAfter: time.Second * 10}, nil + if !isDeploymentReady(deployment) { + instance.Status.Conditions.Set(condition.FalseCondition( + apiv1beta1.OpenStackLightspeedReadyCondition, + condition.RequestedReason, + condition.SeverityInfo, + apiv1beta1.DeploymentsNotReadyMessage, + deploymentName, + )) + return ctrl.Result{RequeueAfter: ResourceCreationTimeout}, nil + } } - controllerutil.RemoveFinalizer(instance, helper.GetFinalizer()) + instance.Status.Conditions.MarkTrue( + apiv1beta1.OpenStackLightspeedReadyCondition, + apiv1beta1.OpenStackLightspeedReadyMessage, + ) + + helper.GetLogger().Info("OpenStackLightspeed Reconciled successfully") - Log.Info("OpenStackLightspeed Reconciling Delete completed") return ctrl.Result{}, nil } @@ -391,12 +268,13 @@ func (r *OpenStackLightspeedReconciler) SetupWithManager(mgr ctrl.Manager) error return ctrl.NewControllerManagedBy(mgr). For(&apiv1beta1.OpenStackLightspeed{}). Owns(&operatorsv1alpha1.ClusterServiceVersion{}). - Owns(&operatorsv1alpha1.Subscription{}). - Watches( - &operatorsv1alpha1.InstallPlan{}, - handler.EnqueueRequestsFromMapFunc(r.NotifyAllOpenStackLightspeeds), - builder.WithPredicates(predicate.ResourceVersionChangedPredicate{}), - ). + Owns(&appsv1.Deployment{}). + Owns(&corev1.ServiceAccount{}). + Owns(&rbacv1.ClusterRole{}). + Owns(&rbacv1.ClusterRoleBinding{}). + Owns(&corev1.Service{}). + Owns(&corev1.ConfigMap{}). + Owns(&corev1.Secret{}). Watches( clusterVersion, handler.EnqueueRequestsFromMapFunc(r.NotifyAllOpenStackLightspeeds), diff --git a/internal/controller/postgres_deployment.go b/internal/controller/postgres_deployment.go new file mode 100644 index 00000000..f89f71e0 --- /dev/null +++ b/internal/controller/postgres_deployment.go @@ -0,0 +1,202 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "path" + "strconv" + + apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// buildPostgresPodTemplateSpec builds the pod template spec for the Postgres deployment. +// If configMapChanged is true, it adds a force-reload timestamp to trigger pod restart. +func buildPostgresPodTemplateSpec() corev1.PodTemplateSpec { + // Build volumes and volume mounts + volumes := []corev1.Volume{} + volumeMounts := []corev1.VolumeMount{} + + restrictedMode := VolumeRestrictedMode + defaultMode := VolumeDefaultMode + + // TLS certs volume (auto-provisioned by service-ca via the Service annotation) + volumes = append(volumes, corev1.Volume{ + Name: "secret-" + PostgresCertsSecretName, + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: PostgresCertsSecretName, + DefaultMode: &restrictedMode, + }, + }, + }) + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: "secret-" + PostgresCertsSecretName, + MountPath: OpenStackLightspeedAppCertsMountRoot, + ReadOnly: true, + }) + + // Bootstrap script volume + volumes = append(volumes, corev1.Volume{ + Name: "secret-" + PostgresBootstrapSecretName, + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: PostgresBootstrapSecretName, + DefaultMode: &restrictedMode, + }, + }, + }) + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: "secret-" + PostgresBootstrapSecretName, + MountPath: PostgresBootstrapVolumeMountPath, + SubPath: PostgresExtensionScript, + ReadOnly: true, + }) + + // Postgres config volume + volumes = append(volumes, corev1.Volume{ + Name: PostgresConfigMapName, + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{Name: PostgresConfigMapName}, + DefaultMode: &defaultMode, + }, + }, + }) + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: PostgresConfigMapName, + MountPath: PostgresConfigVolumeMountPath, + SubPath: PostgresConfigKey, + }) + + // TODO: CRITICAL - Replace EmptyDir with a PVC. With EmptyDir all conversation + // history is lost if the pod is rescheduled or the OCP control plane goes down. + volumes = append(volumes, corev1.Volume{ + Name: PostgresDataVolume, + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: PostgresDataVolume, + MountPath: PostgresDataVolumeMountPath, + }) + + // Postgres CA volume + volumes = append(volumes, getPostgresCAConfigVolume()) + volumeMounts = append(volumeMounts, getPostgresCAVolumeMountWithPath(path.Join(OpenStackLightspeedAppCertsMountRoot, PostgresCAVolume))) + + // Var run volume (writable runtime directory) + volumes = append(volumes, corev1.Volume{ + Name: PostgresVarRunVolumeName, + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: PostgresVarRunVolumeName, + MountPath: PostgresVarRunVolumeMountPath, + }) + + // Tmp volume (writable temp directory) + volumes = append(volumes, corev1.Volume{ + Name: TmpVolumeName, + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: TmpVolumeName, + MountPath: TmpVolumeMountPath, + }) + + return corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: generatePostgresSelectorLabels(), + Annotations: make(map[string]string), + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: PostgresDeploymentName, + Image: apiv1beta1.OpenStackLightspeedDefaultValues.PostgresImageURL, + ImagePullPolicy: corev1.PullAlways, + Ports: []corev1.ContainerPort{ + { + Name: "server", + ContainerPort: PostgresServicePort, + Protocol: corev1.ProtocolTCP, + }, + }, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &[]bool{false}[0], + ReadOnlyRootFilesystem: &[]bool{true}[0], + }, + VolumeMounts: volumeMounts, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("30m"), + corev1.ResourceMemory: resource.MustParse("300Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + Env: []corev1.EnvVar{ + { + Name: "POSTGRESQL_USER", + Value: PostgresDefaultUser, + }, + { + Name: "POSTGRESQL_DATABASE", + Value: PostgresDefaultDbName, + }, + { + Name: "POSTGRESQL_SHARED_BUFFERS", + Value: PostgresSharedBuffers, + }, + { + Name: "POSTGRESQL_MAX_CONNECTIONS", + Value: strconv.Itoa(PostgresMaxConnections), + }, + { + Name: "POSTGRESQL_ADMIN_PASSWORD", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: PostgresSecretName}, + Key: OpenStackLightspeedComponentPasswordFileName, + }, + }, + }, + { + Name: "POSTGRESQL_PASSWORD", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: PostgresSecretName}, + Key: OpenStackLightspeedComponentPasswordFileName, + }, + }, + }, + }, + }, + }, + Volumes: volumes, + }, + } +} diff --git a/internal/controller/postgres_reconciler.go b/internal/controller/postgres_reconciler.go new file mode 100644 index 00000000..70d4549a --- /dev/null +++ b/internal/controller/postgres_reconciler.go @@ -0,0 +1,312 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "crypto/rand" + "encoding/base64" + "fmt" + + common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +// ReconcilePostgresResources reconciles Postgres prerequisite resources (Phase 1): +// ConfigMap, Bootstrap Secret, Password Secret, and Network Policy. +// Uses continue-on-error pattern to attempt all resources even if some fail. +func ReconcilePostgresResources(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + tasks := []ReconcileTask{ + {Name: "PostgresConfigMap", Task: reconcilePostgresConfigMap}, + {Name: "PostgresBootstrapSecret", Task: reconcilePostgresBootstrapSecret}, + {Name: "PostgresSecret", Task: reconcilePostgresSecret}, + {Name: "PostgresNetworkPolicy", Task: reconcilePostgresNetworkPolicy}, + } + + return ReconcileTasks(h, ctx, instance, tasks) +} + +// ReconcilePostgresDeployment reconciles the Postgres Deployment and Service (Phase 2). +// Uses fail-fast pattern where the first error stops execution. +func ReconcilePostgresDeployment(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed) error { + tasks := []ReconcileTask{ + {Name: "PostgresDeployment", Task: reconcilePostgresDeploymentTask}, + {Name: "PostgresService", Task: reconcilePostgresServiceTask}, + } + + return ReconcileTasksFailFast(h, ctx, instance, tasks) +} + +func reconcilePostgresConfigMap(h *common_helper.Helper, ctx context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: PostgresConfigMapName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), cm, func() error { + // Set static postgres configuration + cm.Data = map[string]string{ + PostgresConfigKey: PostgresConfigMapContent, + } + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), cm, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreatePostgresConfigMap, err) + } + + h.GetLogger().Info("Postgres ConfigMap reconciled", "name", cm.Name, "result", result) + return nil +} + +func reconcilePostgresBootstrapSecret(h *common_helper.Helper, ctx context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: PostgresBootstrapSecretName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), secret, func() error { + // Set bootstrap script data + secret.StringData = map[string]string{ + PostgresExtensionScript: PostgresBootStrapScriptContent, + } + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), secret, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreatePostgresBootstrapSecret, err) + } + + h.GetLogger().Info("Postgres bootstrap secret reconciled", "name", secret.Name, "result", result) + return nil +} + +func reconcilePostgresSecret(h *common_helper.Helper, ctx context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + // Check if secret exists - if not, cleanup old secrets first + checkSecret := &corev1.Secret{} + secretKey := client.ObjectKey{ + Name: PostgresSecretName, + Namespace: h.GetBeforeObject().GetNamespace(), + } + err := h.GetClient().Get(ctx, secretKey, checkSecret) + if errors.IsNotFound(err) { + // Delete any old postgres secrets before creating a new one + if err := deleteOldPostgresSecrets(h, ctx); err != nil { + return err + } + } else if err != nil { + return fmt.Errorf("%w: %v", ErrGetPostgresSecret, err) + } + + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: PostgresSecretName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), secret, func() error { + // Only set password if not already present (preserve existing password) + if len(secret.Data) == 0 || secret.Data[OpenStackLightspeedComponentPasswordFileName] == nil { + // Generate random password only on first creation + randomPassword := make([]byte, 12) + if _, err := rand.Read(randomPassword); err != nil { + return fmt.Errorf("%w: %v", ErrGeneratePostgresSecret, err) + } + encodedPassword := base64.StdEncoding.EncodeToString(randomPassword) + secret.Data = map[string][]byte{ + OpenStackLightspeedComponentPasswordFileName: []byte(encodedPassword), + } + } + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), secret, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreatePostgresSecret, err) + } + + h.GetLogger().Info("Postgres secret reconciled", "name", secret.Name, "result", result) + return nil +} + +func deleteOldPostgresSecrets(h *common_helper.Helper, ctx context.Context) error { + labelSelector := labels.Set{"app.kubernetes.io/name": "lightspeed-service-postgres"}.AsSelector() + matchingLabels := client.MatchingLabelsSelector{Selector: labelSelector} + deleteOptions := &client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{ + Namespace: h.GetBeforeObject().GetNamespace(), + LabelSelector: matchingLabels, + }, + } + if err := h.GetClient().DeleteAllOf(ctx, &corev1.Secret{}, deleteOptions); err != nil { + return fmt.Errorf("failed to delete old Postgres secrets: %w", err) + } + return nil +} + +func reconcilePostgresNetworkPolicy(h *common_helper.Helper, ctx context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + np := &networkingv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: PostgresNetworkPolicyName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), np, func() error { + // Set Spec (wholesale replacement, same as before) + // Restricts ingress to Postgres to only allow traffic from app server pods + np.Spec = networkingv1.NetworkPolicySpec{ + PodSelector: metav1.LabelSelector{ + MatchLabels: generatePostgresSelectorLabels(), + }, + Ingress: []networkingv1.NetworkPolicyIngressRule{ + { + From: []networkingv1.NetworkPolicyPeer{ + { + PodSelector: &metav1.LabelSelector{ + MatchLabels: generateAppServerSelectorLabels(), + }, + }, + }, + Ports: []networkingv1.NetworkPolicyPort{ + { + Protocol: toPtr(corev1.ProtocolTCP), + Port: toPtr(intstr.FromInt32(PostgresServicePort)), + }, + }, + }, + }, + PolicyTypes: []networkingv1.PolicyType{ + networkingv1.PolicyTypeIngress, + }, + } + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), np, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreatePostgresNetworkPolicy, err) + } + + h.GetLogger().Info("Postgres NetworkPolicy reconciled", "name", np.Name, "result", result) + return nil +} + +func reconcilePostgresDeploymentTask(h *common_helper.Helper, ctx context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + deployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: PostgresDeploymentName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), deployment, func() error { + currentConfigMapVersion, err := getConfigMapResourceVersion(ctx, h, PostgresConfigMapName, h.GetBeforeObject().GetNamespace()) + if err != nil && !errors.IsNotFound(err) { + return fmt.Errorf("%w: %v", ErrGetPostgresConfigMap, err) + } + + // Build the desired deployment pod spec + podTemplateSpec := buildPostgresPodTemplateSpec() + + // Initialize annotations map if needed + if podTemplateSpec.Annotations == nil { + podTemplateSpec.Annotations = map[string]string{} + } + + // Store the current ConfigMap version in pod template annotations. + // When this changes, Kubernetes will see a pod template change and trigger a rollout. + podTemplateSpec.Annotations[PostgresConfigMapResourceVersionAnnotation] = currentConfigMapVersion + + // Selective field updates (avoid update loops) + replicas := int32(1) + deployment.Spec.Replicas = &replicas + deployment.Spec.Selector = &metav1.LabelSelector{ + MatchLabels: generatePostgresSelectorLabels(), + } + deployment.Spec.Template = podTemplateSpec + + // Also set RevisionHistoryLimit to match current behavior + revisionHistoryLimit := int32(1) + deployment.Spec.RevisionHistoryLimit = &revisionHistoryLimit + + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), deployment, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreatePostgresDeployment, err) + } + + h.GetLogger().Info("Postgres Deployment reconciled", "name", deployment.Name, "result", result) + return nil +} + +func reconcilePostgresServiceTask(h *common_helper.Helper, ctx context.Context, _ *apiv1beta1.OpenStackLightspeed) error { + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: PostgresServiceName, + Namespace: h.GetBeforeObject().GetNamespace(), + }, + } + + result, err := controllerutil.CreateOrPatch(ctx, h.GetClient(), svc, func() error { + // Selective field updates (preserves ClusterIP, ClusterIPs, etc.) + svc.Spec.Selector = generatePostgresSelectorLabels() + svc.Spec.Ports = []corev1.ServicePort{ + { + Port: PostgresServicePort, + Protocol: corev1.ProtocolTCP, + Name: "server", + TargetPort: intstr.Parse("server"), + }, + } + svc.Spec.Type = corev1.ServiceTypeClusterIP + + // Set service-ca annotation for TLS certificate provisioning + if svc.Annotations == nil { + svc.Annotations = make(map[string]string) + } + svc.Annotations[ServingCertSecretAnnotationKey] = PostgresCertsSecretName + + // Set owner reference + return controllerutil.SetControllerReference(h.GetBeforeObject(), svc, h.GetScheme()) + }) + + if err != nil { + return fmt.Errorf("%w: %v", ErrCreatePostgresService, err) + } + + h.GetLogger().Info("Postgres Service reconciled", "name", svc.Name, "result", result) + return nil +} diff --git a/internal/controller/reconcile_tasks.go b/internal/controller/reconcile_tasks.go new file mode 100644 index 00000000..e345c5b1 --- /dev/null +++ b/internal/controller/reconcile_tasks.go @@ -0,0 +1,72 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + + common_helper "github.com/openstack-k8s-operators/lib-common/modules/common/helper" + apiv1beta1 "github.com/openstack-lightspeed/operator/api/v1beta1" +) + +// ReconcileFunc is a function that reconciles a single lcore resource. +type ReconcileFunc func(*common_helper.Helper, context.Context, *apiv1beta1.OpenStackLightspeed) error + +// ReconcileTask pairs a task name with its reconcile function. +type ReconcileTask struct { + Name string + Task ReconcileFunc +} + +// ReconcileTasks executes a list of reconciliation tasks sequentially, logging +// each failure but continuing through the remaining tasks. It returns the first +// error encountered, wrapped with the failing task's name. +func ReconcileTasks(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed, tasks []ReconcileTask) error { + logger := h.GetLogger() + logger.Info("reconciling resources") + + var firstErr error + for _, t := range tasks { + if err := t.Task(h, ctx, instance); err != nil { + logger.Error(err, "failed to reconcile resource", "task", t.Name) + if firstErr == nil { + firstErr = fmt.Errorf("task %s: %w", t.Name, err) + } + } + } + + if firstErr != nil { + return firstErr + } + + logger.Info("resources reconciled") + return nil +} + +// ReconcileTasksFailFast executes a list of reconciliation tasks sequentially, +// stopping immediately at the first error encountered. This is useful for tasks +// that have strict ordering dependencies where subsequent tasks cannot proceed +// if earlier ones fail. +func ReconcileTasksFailFast(h *common_helper.Helper, ctx context.Context, instance *apiv1beta1.OpenStackLightspeed, tasks []ReconcileTask) error { + for _, t := range tasks { + if err := t.Task(h, ctx, instance); err != nil { + return fmt.Errorf("task %s: %w", t.Name, err) + } + } + return nil +} diff --git a/kuttl-test.yaml b/kuttl-test.yaml index 8fbed551..a2f543cc 100644 --- a/kuttl-test.yaml +++ b/kuttl-test.yaml @@ -3,7 +3,7 @@ kind: TestSuite reportFormat: xml reportName: kuttl-report-openstack-lightspeed reportGranularity: test -namespace: openshift-lightspeed +namespace: openstack-lightspeed timeout: 600 parallel: 1 suppress: diff --git a/scripts/confirm-rhosls-running.sh b/scripts/confirm-rhosls-running.sh index 97091776..e6386133 100644 --- a/scripts/confirm-rhosls-running.sh +++ b/scripts/confirm-rhosls-running.sh @@ -1,7 +1,7 @@ #!/bin/bash while true; do - csv=$(oc get subscription openstack-lightspeed-operator -n openshift-lightspeed -o jsonpath='{.status.installedCSV}' 2>/dev/null) + csv=$(oc get subscription openstack-lightspeed-operator -n openstack-lightspeed -o jsonpath='{.status.installedCSV}' 2>/dev/null) if [ -n "$csv" ]; then echo "Found installedCSV: $csv" break @@ -11,4 +11,4 @@ while true; do done # Wait for the CSV to succeed -oc wait csv $csv --for=jsonpath='{.status.phase}'=Succeeded --timeout=300s -n openshift-lightspeed +oc wait csv "$csv" --for=jsonpath='{.status.phase}'=Succeeded --timeout=300s -n openstack-lightspeed diff --git a/scripts/env.sh b/scripts/env.sh index 3b361302..20c79ce1 100644 --- a/scripts/env.sh +++ b/scripts/env.sh @@ -1,3 +1,2 @@ #!/bin/bash -export OPENSHIFT_LIGHTSPEED_OPERATOR_VERSION="latest" -export WATCH_NAMESPACE="openshift-lightspeed" +export WATCH_NAMESPACE="openstack-lightspeed" diff --git a/scripts/gen-ols.sh b/scripts/gen-ols.sh deleted file mode 100644 index b4e9be64..00000000 --- a/scripts/gen-ols.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -# Deploy OLS -# Optionally pass the CSV version to use as an argument to use other than the -# latest stable version. -set -ex - -if [ -z "${OUTPUT_DIR}" ]; then - echo "Please set OPERATOR_DIR" - exit 1 -fi - -if [ -n "$1" ]; then - CSV_VERSION="$1" -else - CSV_VERSION=$(oc get packagemanifest lightspeed-operator -o go-template="{{range .status.channels}}{{if eq .name \"stable\"}}{{.currentCSV}}{{\"\n\"}}{{end}}{{end}}") -fi - -DEST_DIR="${OUTPUT_DIR}/ols" -mkdir -p "${DEST_DIR}" - -cat > "${DEST_DIR}/namespace.yaml" < "${DEST_DIR}/operator_group.yaml" < "${DEST_DIR}/subscription.yaml" < "${DEST_DIR}/namespace.yaml" < "${DEST_DIR}/operator_group.yaml" <