diff --git a/bin/k8s/Chart.yaml b/bin/k8s/Chart.yaml index 1e6dcfbef83..9f6122fc3fc 100644 --- a/bin/k8s/Chart.yaml +++ b/bin/k8s/Chart.yaml @@ -59,6 +59,11 @@ dependencies: repository: oci://docker.io/envoyproxy alias: envoy-gateway + - name: lakekeeper + version: 0.9.0 + repository: https://lakekeeper.github.io/lakekeeper-charts/ + condition: lakekeeper.enabled + - name: metrics-server version: 3.12.2 repository: https://kubernetes-sigs.github.io/metrics-server/ diff --git a/bin/k8s/files/texera_lakekeeper.sql b/bin/k8s/files/texera_lakekeeper.sql new file mode 120000 index 00000000000..6ddbed93822 --- /dev/null +++ b/bin/k8s/files/texera_lakekeeper.sql @@ -0,0 +1 @@ +../../../sql/texera_lakekeeper.sql \ No newline at end of file diff --git a/bin/k8s/templates/external-names.yaml b/bin/k8s/templates/external-names.yaml index 69540067b81..691c92e0b19 100644 --- a/bin/k8s/templates/external-names.yaml +++ b/bin/k8s/templates/external-names.yaml @@ -81,4 +81,10 @@ to access services in the main namespace using the same service names. "externalName" (printf "%s-minio.%s.svc.cluster.local" .Release.Name $namespace) ) | nindent 0 }} - +--- +{{/* Lakekeeper ExternalName - Add this block */}} +{{- include "external-name-service" (dict + "name" (printf "%s-lakekeeper" .Release.Name) + "namespace" $workflowComputingUnitPoolNamespace + "externalName" (printf "%s-lakekeeper.%s.svc.cluster.local" .Release.Name $namespace) +) | nindent 0 }} diff --git a/bin/k8s/templates/lakekeeper-init-job.yaml b/bin/k8s/templates/lakekeeper-init-job.yaml new file mode 100644 index 00000000000..0a3540b6055 --- /dev/null +++ b/bin/k8s/templates/lakekeeper-init-job.yaml @@ -0,0 +1,137 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{{- if .Values.lakekeeperInit.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Release.Name }}-lakekeeper-init + namespace: {{ .Release.Namespace }} +spec: + backoffLimit: 3 + template: + metadata: + name: {{ .Release.Name }}-lakekeeper-init + spec: + restartPolicy: Never + containers: + - name: lakekeeper-init + image: alpine:3.19 + env: + - name: STORAGE_S3_ENDPOINT + value: http://{{ .Release.Name }}-minio:9000 + - name: STORAGE_S3_AUTH_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-minio + key: root-user + - name: STORAGE_S3_AUTH_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-minio + key: root-password + - name: STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET + value: {{ .Values.lakekeeperInit.warehouse.s3Bucket | quote }} + - name: STORAGE_ICEBERG_CATALOG_REST_REGION + value: {{ .Values.lakekeeperInit.warehouse.region | quote }} + - name: STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME + value: {{ .Values.lakekeeperInit.warehouse.name | quote }} + - name: LAKEKEEPER_BASE_URI + value: http://{{ .Release.Name }}-lakekeeper:{{ .Values.lakekeeper.catalog.service.externalPort }} + - name: LAKEKEEPER_PROJECT_ID + value: {{ .Values.lakekeeperInit.defaultProject.id | quote }} + - name: LAKEKEEPER_PROJECT_NAME + value: {{ .Values.lakekeeperInit.defaultProject.name | quote }} + command: + - /bin/sh + - -c + - | + set -e + + apk add --no-cache curl ca-certificates wget + wget -q https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc + chmod +x /usr/local/bin/mc + + check_status() { + if [ "$1" -ge 200 ] && [ "$1" -lt 300 ]; then + echo "Created $2 successfully (HTTP $1)." + elif [ "$1" -eq 409 ]; then + echo "$2 already exists (HTTP 409). Treating as success." + else + echo "Failed to create $2. HTTP Code: $1" + echo "ERROR RESPONSE:" + if [ -f /tmp/response.txt ]; then cat /tmp/response.txt; fi + echo "" + exit 1 + fi + } + + echo "Waiting for Lakekeeper health endpoint..." + until curl -s -f "${LAKEKEEPER_BASE_URI}/health" > /dev/null 2>&1; do + sleep 3 + done + + echo "Step 1: Initializing MinIO bucket '${STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET}'..." + mc alias set minio "${STORAGE_S3_ENDPOINT}" "${STORAGE_S3_AUTH_USERNAME}" "${STORAGE_S3_AUTH_PASSWORD}" || true + if mc ls minio/${STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET} > /dev/null 2>&1; then + echo "MinIO bucket '${STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET}' already exists." + else + mc mb minio/${STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET} + echo "MinIO bucket '${STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET}' created successfully." + fi + + echo "Step 2: Initializing default project..." + PROJECT_PAYLOAD="{\"project-id\":\"${LAKEKEEPER_PROJECT_ID}\",\"project-name\":\"${LAKEKEEPER_PROJECT_NAME}\"}" + PROJECT_CODE=$(curl -s -o /tmp/response.txt -w "%{http_code}" \ + -X POST \ + -H "Content-Type: application/json" \ + -d "${PROJECT_PAYLOAD}" \ + "${LAKEKEEPER_BASE_URI}/management/v1/project" || echo "000") + check_status "${PROJECT_CODE}" "Default Project" + + echo "Step 3: Initializing warehouse '${STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME}'..." + CREATE_PAYLOAD=$(cat < /tmp/texera_lakekeeper.sql +{{ .Files.Get "files/texera_lakekeeper.sql" | indent 6 }} + EOF + psql -U postgres -f /tmp/texera_lakekeeper.sql + echo "Initializing Texera database..." cat <<'EOF' > /tmp/texera_ddl.sql {{ .Files.Get "files/texera_ddl.sql" | indent 6 }} diff --git a/bin/k8s/templates/webserver-deployment.yaml b/bin/k8s/templates/webserver-deployment.yaml index 56642c54785..983c6269947 100644 --- a/bin/k8s/templates/webserver-deployment.yaml +++ b/bin/k8s/templates/webserver-deployment.yaml @@ -60,6 +60,17 @@ spec: secretKeyRef: name: {{ .Release.Name }}-lakefs-secret key: secret_key + # Workflow Result (Lakekeeper REST catalog) + - name: STORAGE_ICEBERG_CATALOG_TYPE + value: rest + - name: STORAGE_ICEBERG_CATALOG_REST_URI + value: http://{{ .Release.Name }}-lakekeeper:{{ .Values.lakekeeper.catalog.service.externalPort }}/catalog + - name: STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME + value: {{ .Values.lakekeeperInit.warehouse.name | quote }} + - name: STORAGE_ICEBERG_CATALOG_REST_REGION + value: {{ .Values.lakekeeperInit.warehouse.region | quote }} + - name: STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET + value: {{ .Values.lakekeeperInit.warehouse.s3Bucket | quote }} {{- range .Values.texeraEnvVars }} - name: {{ .name }} value: "{{ .value }}" diff --git a/bin/k8s/templates/workflow-computing-unit-manager-deployment.yaml b/bin/k8s/templates/workflow-computing-unit-manager-deployment.yaml index 5241d9160a3..7a0185cd465 100644 --- a/bin/k8s/templates/workflow-computing-unit-manager-deployment.yaml +++ b/bin/k8s/templates/workflow-computing-unit-manager-deployment.yaml @@ -33,6 +33,25 @@ spec: app: {{ .Release.Name }}-{{ .Values.workflowComputingUnitManager.name }} spec: serviceAccountName: {{ .Values.workflowComputingUnitManager.serviceAccountName }} + initContainers: + - name: wait-lakekeeper + image: curlimages/curl:latest + command: + - /bin/sh + - -c + - | + set -e + LAKEKEEPER_BASE_URI="http://{{ .Release.Name }}-lakekeeper:{{ .Values.lakekeeper.catalog.service.externalPort }}" + WAREHOUSE_NAME="{{ .Values.lakekeeperInit.warehouse.name }}" + echo "Waiting for Lakekeeper to become healthy..." + until curl -s -f "${LAKEKEEPER_BASE_URI}/health" > /dev/null 2>&1; do + sleep 1 + done + echo "Waiting for warehouse '${WAREHOUSE_NAME}' to exist..." + until curl -s "${LAKEKEEPER_BASE_URI}/management/v1/warehouse" | grep -q "\"name\"[[:space:]]*:[[:space:]]*\"${WAREHOUSE_NAME}\""; do + sleep 1 + done + echo "Lakekeeper warehouse is ready." containers: - name: {{ .Values.workflowComputingUnitManager.name }} image: {{ .Values.texera.imageRegistry }}/{{ .Values.workflowComputingUnitManager.imageName }}:{{ .Values.texera.imageTag }} @@ -88,16 +107,15 @@ spec: key: secret_key # Workflow Result - name: STORAGE_ICEBERG_CATALOG_TYPE - value: postgres - - name: STORAGE_ICEBERG_CATALOG_POSTGRES_URI_WITHOUT_SCHEME - value: {{ .Release.Name }}-postgresql:5432/texera_iceberg_catalog - - name: STORAGE_ICEBERG_CATALOG_POSTGRES_USERNAME - value: postgres - - name: STORAGE_ICEBERG_CATALOG_POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Release.Name }}-postgresql - key: postgres-password + value: rest + - name: STORAGE_ICEBERG_CATALOG_REST_URI + value: http://{{ .Release.Name }}-lakekeeper:{{ .Values.lakekeeper.catalog.service.externalPort }}/catalog + - name: STORAGE_ICEBERG_CATALOG_REST_WAREHOUSE_NAME + value: {{ .Values.lakekeeperInit.warehouse.name | quote }} + - name: STORAGE_ICEBERG_CATALOG_REST_REGION + value: {{ .Values.lakekeeperInit.warehouse.region | quote }} + - name: STORAGE_ICEBERG_CATALOG_REST_S3_BUCKET + value: {{ .Values.lakekeeperInit.warehouse.s3Bucket | quote }} {{- range .Values.texeraEnvVars }} - name: {{ .name }} value: "{{ .value }}" diff --git a/bin/k8s/values.yaml b/bin/k8s/values.yaml index 7558591c4dd..ef9a89f9b55 100644 --- a/bin/k8s/values.yaml +++ b/bin/k8s/values.yaml @@ -113,6 +113,38 @@ lakefs: access_key_id: texera_minio secret_access_key: password +lakekeeper: + enabled: true + postgresql: + enabled: false + internalOpenFGA: false + catalog: + replicas: 1 + image: + repository: vakamo/lakekeeper + tag: v0.11.0 + pullPolicy: IfNotPresent + service: + externalPort: 8181 + externalDatabase: + type: postgres + host_read: texera-postgresql + host_write: texera-postgresql + port: 5432 + database: texera_lakekeeper + user: postgres + password: root_password + +lakekeeperInit: + enabled: true + defaultProject: + id: "00000000-0000-0000-0000-000000000000" + name: default + warehouse: + name: texera + region: us-west-2 + s3Bucket: texera-iceberg + # Part2: configurations of Texera-related micro services texeraImages: pullPolicy: Always