diff --git a/install/0000_50_cluster-update-console-plugin_10_namespace.yaml b/install/0000_50_cluster-update-console-plugin_10_namespace.yaml new file mode 100644 index 000000000..2e8383be0 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_10_namespace.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: The OpenShift cluster-update console plugin provides a web-console interface for managing ClusterVersion updates. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" + labels: + openshift.io/cluster-monitoring: "true" + pod-security.kubernetes.io/audit: restricted + pod-security.kubernetes.io/enforce: restricted + pod-security.kubernetes.io/warn: restricted diff --git a/install/0000_50_cluster-update-console-plugin_20_networkpolicy.yaml b/install/0000_50_cluster-update-console-plugin_20_networkpolicy.yaml new file mode 100644 index 000000000..78bc82235 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_20_networkpolicy.yaml @@ -0,0 +1,16 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-deny + namespace: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: This NetworkPolicy is used to deny all ingress and egress traffic by default in this namespace, matching all Pods, and serving as a baseline. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress diff --git a/install/0000_50_cluster-update-console-plugin_50_deployment.yaml b/install/0000_50_cluster-update-console-plugin_50_deployment.yaml new file mode 100644 index 000000000..f8b19d125 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_50_deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-update-console-plugin + namespace: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: The OpenShift cluster-update console plugin provides a web-console interface for managing ClusterVersion updates. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + selector: + matchLabels: + app: cluster-update-console-plugin + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + openshift.io/required-scc: restricted-v3 + labels: + app: cluster-update-console-plugin + spec: + automountServiceAccountToken: false + containers: + - name: plugin + image: placeholder.url.oc.will.replace.this.example.org:cluster-update-console-plugin + imagePullPolicy: IfNotPresent + ports: + - name: https + containerPort: 9001 + resources: + requests: + cpu: 20m + memory: 50Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/cert + name: cluster-update-console-plugin-cert + readOnly: true + dnsPolicy: Default + nodeSelector: + node-role.kubernetes.io/infra: "" + priorityClassName: system-cluster-critical + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + operator: Exists + volumes: + - name: cluster-update-console-plugin-cert + secret: + defaultMode: 420 + secretName: cluster-update-console-plugin-cert diff --git a/install/0000_50_cluster-update-console-plugin_60_service.yaml b/install/0000_50_cluster-update-console-plugin_60_service.yaml new file mode 100644 index 000000000..7d9728d91 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_60_service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: openshift-cluster-update-console-plugin + namespace: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: The OpenShift cluster-update console plugin provides a web-console interface for managing ClusterVersion updates. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + type: ClusterIP + selector: + app: cluster-update-console-plugin + ports: + - name: https + port: 9001 + targetPort: https diff --git a/install/0000_50_cluster-update-console-plugin_80_servicemonitor.yaml b/install/0000_50_cluster-update-console-plugin_80_servicemonitor.yaml new file mode 100644 index 000000000..cac1ebaf8 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_80_servicemonitor.yaml @@ -0,0 +1,146 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: cluster-version-operator + name: cluster-version-operator + namespace: openshift-cluster-version + annotations: + kubernetes.io/description: Configure Prometheus to monitor cluster-version operator metrics. + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + endpoints: + - interval: 30s + port: metrics + scheme: https + tlsConfig: + serverName: cluster-version-operator.openshift-cluster-version.svc + scrapeClass: tls-client-certificate-auth + namespaceSelector: + matchNames: + - openshift-cluster-version + selector: + matchLabels: + k8s-app: cluster-version-operator +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + k8s-app: cluster-version-operator + name: cluster-version-operator + namespace: openshift-cluster-version + annotations: + kubernetes.io/description: Alerting rules for when cluster-version operator metrics call for administrator attention. + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + groups: + - name: cluster-version + rules: + - alert: ClusterVersionOperatorDown + annotations: + summary: Cluster version operator has disappeared from Prometheus target discovery. + description: The operator may be down or disabled. The cluster will not be kept up to date and upgrades will not be possible. Inspect the openshift-cluster-version namespace for events or changes to the cluster-version-operator deployment or pods to diagnose and repair. {{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} For more information refer to {{ label \"url\" (first $console_url ) }}/k8s/cluster/projects/openshift-cluster-version.{{ end }}{{ end }}" }} + runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-version-operator/ClusterVersionOperatorDown.md + expr: | + absent(up{job="cluster-version-operator"} == 1) + for: 10m + labels: + namespace: openshift-cluster-version + severity: critical + - alert: CannotRetrieveUpdates + annotations: + summary: Cluster version operator has not retrieved updates in {{ "{{ $value | humanizeDuration }}" }}. + description: Failure to retrieve updates means that cluster administrators will need to monitor for available updates on their own or risk falling behind on security or other bugfixes. If the failure is expected, you can clear spec.channel in the ClusterVersion object to tell the cluster-version operator to not retrieve updates. Failure reason {{ "{{ with $cluster_operator_conditions := \"cluster_operator_conditions\" | query}}{{range $value := .}}{{if and (eq (label \"name\" $value) \"version\") (eq (label \"condition\" $value) \"RetrievedUpdates\") (eq (label \"endpoint\" $value) \"metrics\") (eq (value $value) 0.0)}}{{label \"reason\" $value}} {{end}}{{end}}{{end}}" }}. For more information refer to `oc get clusterversion/version -o=jsonpath="{.status.conditions[?(.type=='RetrievedUpdates')]}{'\n'}"`{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + max by (namespace) + ( + ( + time()-cluster_version_operator_update_retrieval_timestamp_seconds + ) >= 3600 + and ignoring(condition, name, reason) + (cluster_operator_conditions{name="version", condition="RetrievedUpdates", endpoint="metrics", reason!="NoChannel"}) + ) + labels: + severity: warning + - alert: UpdateAvailable + annotations: + summary: Your upstream update recommendation service recommends you update your cluster. + description: For more information refer to 'oc adm upgrade'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + sum by (channel, namespace, upstream) (cluster_version_available_updates) > 0 + labels: + severity: info + - alert: ClusterReleaseNotAccepted + annotations: + summary: The desired cluster release has not been accepted for at least an hour. + description: The desired cluster release has not been accepted because {{ "{{ $labels.reason }}" }}, and the cluster will continue to reconcile an earlier release instead of moving towards that desired release. For more information refer to 'oc adm upgrade'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + max by (namespace, name, reason) (cluster_operator_conditions{name="version", condition="ReleaseAccepted", endpoint="metrics"} == 0) + for: 60m + labels: + severity: warning + - name: cluster-operators + rules: + - alert: ClusterNotUpgradeable + annotations: + summary: One or more cluster operators have been blocking minor or major version cluster updates for at least an hour. + description: In most cases, you will still be able to apply patch releases. Reason {{ "{{ with $cluster_operator_conditions := \"cluster_operator_conditions\" | query}}{{range $value := .}}{{if and (eq (label \"name\" $value) \"version\") (eq (label \"condition\" $value) \"Upgradeable\") (eq (label \"endpoint\" $value) \"metrics\") (eq (value $value) 0.0) (ne (len (label \"reason\" $value)) 0) }}{{label \"reason\" $value}}.{{end}}{{end}}{{end}}"}} For more information refer to 'oc adm upgrade'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + max by (namespace, name, condition, endpoint) (cluster_operator_conditions{name="version", condition="Upgradeable", endpoint="metrics"} == 0) + for: 60m + labels: + severity: info + - alert: ClusterOperatorDown + annotations: + summary: Cluster operator has not been available for 10 minutes. + description: The {{ "{{ $labels.name }}" }} operator may be down or disabled because {{ "{{ $labels.reason }}" }}, and the components it manages may be unavailable or degraded. Cluster upgrades may not complete. For more information refer to '{{ "{{ if eq $labels.name \"version\" }}oc adm upgrade{{ else }}oc get -o yaml clusteroperator {{ $labels.name }}{{ end }}" }}'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDown.md + expr: | + max by (namespace, name, reason) (cluster_operator_up{job="cluster-version-operator"} == 0) + for: 10m + labels: + severity: critical + - alert: ClusterOperatorDegraded + annotations: + summary: Cluster operator has been degraded for 30 minutes. + description: The {{ "{{ $labels.name }}" }} operator is degraded because {{ "{{ $labels.reason }}" }}, and the components it manages may have reduced quality of service. Cluster upgrades may not complete. For more information refer to '{{ "{{ if eq $labels.name \"version\" }}oc adm upgrade{{ else }}oc get -o yaml clusteroperator {{ $labels.name }}{{ end }}" }}'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + expr: | + max by (namespace, name, reason) + ( + ( + cluster_operator_conditions{job="cluster-version-operator", name!="version", condition="Degraded"} + or on (namespace, name) + cluster_operator_conditions{job="cluster-version-operator", name="version", condition="Failing"} + or on (namespace, name) + group by (namespace, name) (cluster_operator_up{job="cluster-version-operator"}) + ) == 1 + ) + for: 30m + labels: + severity: warning + - alert: ClusterOperatorFlapping + annotations: + summary: Cluster operator up status is changing often. + description: The {{ "{{ $labels.name }}" }} operator behavior might cause upgrades to be unstable. For more information refer to '{{ "{{ if eq $labels.name \"version\" }}oc adm upgrade{{ else }}oc get -o yaml clusteroperator {{ $labels.name }}{{ end }}" }}'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + max by (namespace, name) (changes(cluster_operator_up{job="cluster-version-operator"}[2m]) > 2) + for: 10m + labels: + severity: warning + - alert: CannotEvaluateConditionalUpdates + annotations: + summary: Cluster Version Operator cannot evaluate conditional update matches for {{ "{{ $value | humanizeDuration }}" }}. + description: Failure to evaluate conditional update matches means that Cluster Version Operator cannot decide whether an update path is recommended or not. + expr: | + max by (version, condition, status, reason) + ( + ( + time()-cluster_version_conditional_update_condition_seconds{condition="Recommended", status="Unknown"} + ) >= 3600 + ) + labels: + severity: warning diff --git a/install/0000_50_cluster-update-console-plugin_90_consoleplugin.yaml b/install/0000_50_cluster-update-console-plugin_90_consoleplugin.yaml new file mode 100644 index 000000000..5ecd55538 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_90_consoleplugin.yaml @@ -0,0 +1,21 @@ +apiVersion: console.openshift.io/v1 +kind: ConsolePlugin +metadata: + name: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: The OpenShift cluster-update console plugin provides a web-console interface for managing ClusterVersion updates. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + displayName: Cluster Updates + i18n: + loadType: Preload + backend: + type: Service + service: + name: cluster-update-console-plugin + namespace: openshift-cluster-update-console-plugin + port: https + basePath: / diff --git a/install/image-references b/install/image-references new file mode 100644 index 000000000..32c17958f --- /dev/null +++ b/install/image-references @@ -0,0 +1,8 @@ +kind: ImageStream +apiVersion: image.openshift.io/v1 +spec: + tags: + - name: cluster-update-console-plugin + from: + kind: DockerImage + name: placeholder.url.oc.will.replace.this.example.org:cluster-update-console-plugin diff --git a/pkg/payload/render_test.go b/pkg/payload/render_test.go index deb475b16..b1d450d22 100644 --- a/pkg/payload/render_test.go +++ b/pkg/payload/render_test.go @@ -341,6 +341,10 @@ func Test_cvoManifests(t *testing.T) { return nil } + if _, fileName := filepath.Split(path); fileName == "image-references" { + return nil + } + var manifestsWithoutIncludeAnnotation []manifest.Manifest data, err := os.ReadFile(path) if err != nil {