From 19f82bf399f82811438e1d22d1c7ce5eb6464f6c Mon Sep 17 00:00:00 2001 From: Douglas Hensel Date: Fri, 26 Jun 2026 15:04:54 -0400 Subject: [PATCH 1/2] OCPEDGE-2727: Add eval-skills presubmit and enhance agent-eval ref MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add EVAL_REPO_DIR env var to ref (backward compatible, defaults to /opt/ai-helpers) - Reorder commands.sh: plugins install → setup script → config check - Support setup script overriding EVAL_CONFIG or setting EVAL_SNAPSHOT_DIR - Add eval-skills presubmit for edge-tooling (optional, triggers on plugins skill/eval changes) Co-Authored-By: Claude Opus 4.6 --- .../openshift-eng-edge-tooling-main.yaml | 11 +++ ...hift-eng-edge-tooling-main-presubmits.yaml | 77 +++++++++++++++++++ .../openshift-claude-agent-eval-commands.sh | 40 ++++++---- .../openshift-claude-agent-eval-ref.yaml | 2 + 4 files changed, 113 insertions(+), 17 deletions(-) diff --git a/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml b/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml index c572084c2ac20..796ed5801c8d3 100644 --- a/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml +++ b/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml @@ -55,6 +55,17 @@ tests: clone: true from: root run_if_changed: (SKILL\.md|^scripts/lint-skills\.py|^Makefile|^plugins/.*/skills/) +- always_run: false + as: eval-skills + optional: true + run_if_changed: ^plugins/.*/(skills|evals) + steps: + env: + EVAL_REPO_DIR: /opt/app-root/src/edge-tooling + EVAL_MODEL: claude-sonnet-4-6 + EVAL_SETUP_SCRIPT: scripts/eval-setup-ci.sh + test: + - ref: openshift-claude-agent-eval - as: ocp-ci-monitor cron: 0 7 * * 1-5 reporter_config: diff --git a/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml b/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml index 458e2d110f7b6..c4d68c0f3d419 100644 --- a/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml +++ b/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml @@ -1,5 +1,82 @@ presubmits: openshift-eng/edge-tooling: + - agent: kubernetes + always_run: false + branches: + - ^main$ + - ^main- + cluster: build12 + context: ci/prow/eval-skills + decorate: true + decoration_config: + sparse_checkout_files: + - images/Containerfile.ci + - images/Containerfile.markdownlint + labels: + ci.openshift.io/generator: prowgen + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: pull-ci-openshift-eng-edge-tooling-main-eval-skills + optional: true + rerun_command: /test eval-skills + run_if_changed: ^plugins/.*/(skills|evals) + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --target=eval-skills + command: + - ci-operator + env: + - name: HTTP_SERVER_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + ports: + - containerPort: 8080 + name: http + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator + trigger: (?m)^/test( | .* )eval-skills,?($|\s.*) - agent: kubernetes always_run: true branches: diff --git a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh index 288bbd6ac6913..bc73a15db87f8 100755 --- a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh +++ b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh @@ -31,22 +31,22 @@ else echo "Warning: GitHub token not found at ${GITHUB_TOKEN_PATH:-}. gh CLI will run unauthenticated." fi -# The repo is at /opt/ai-helpers; WORKDIR is /workspace -cd /opt/ai-helpers +cd "${EVAL_REPO_DIR}" echo "Config: ${EVAL_CONFIG}" echo "Skill model: ${EVAL_MODEL}" # ----------------------------------------------------------------------- -# Verify eval config exists +# Install plugins # ----------------------------------------------------------------------- -if [[ ! -f "${EVAL_CONFIG}" ]]; then - echo "ERROR: EVAL_CONFIG not found at ${EVAL_CONFIG}" - exit 1 -fi +echo "" +echo "=== Installing plugins ===" +EVAL_HARNESS_DIR="/tmp/agent-eval-harness" +git clone --depth 1 https://github.com/opendatahub-io/agent-eval-harness.git "${EVAL_HARNESS_DIR}" +echo "agent-eval-harness cloned." # ----------------------------------------------------------------------- -# Run optional setup script (e.g. extract snapshots, populate fixtures) +# Run optional setup script (e.g. extract snapshots, generate cases) # ----------------------------------------------------------------------- if [[ -n "${EVAL_SETUP_SCRIPT}" ]]; then if [[ ! -f "${EVAL_SETUP_SCRIPT}" ]]; then @@ -55,19 +55,25 @@ if [[ -n "${EVAL_SETUP_SCRIPT}" ]]; then fi echo "" echo "=== Running setup script: ${EVAL_SETUP_SCRIPT} ===" - EVAL_SNAPSHOT_DIR=$(bash "${EVAL_SETUP_SCRIPT}") - export EVAL_SNAPSHOT_DIR - echo "Snapshot dir: ${EVAL_SNAPSHOT_DIR}" + EVAL_SETUP_OUTPUT=$(bash "${EVAL_SETUP_SCRIPT}") + if [[ -n "${EVAL_SETUP_OUTPUT}" ]]; then + if [[ -f "${EVAL_SETUP_OUTPUT}" ]]; then + EVAL_CONFIG="${EVAL_SETUP_OUTPUT}" + echo "Setup script overrode EVAL_CONFIG: ${EVAL_CONFIG}" + else + export EVAL_SNAPSHOT_DIR="${EVAL_SETUP_OUTPUT}" + echo "Snapshot dir: ${EVAL_SNAPSHOT_DIR}" + fi + fi fi # ----------------------------------------------------------------------- -# Install plugins +# Verify eval config exists # ----------------------------------------------------------------------- -echo "" -echo "=== Installing plugins ===" -EVAL_HARNESS_DIR="/tmp/agent-eval-harness" -git clone --depth 1 https://github.com/opendatahub-io/agent-eval-harness.git "${EVAL_HARNESS_DIR}" -echo "agent-eval-harness cloned." +if [[ ! -f "${EVAL_CONFIG}" ]]; then + echo "ERROR: EVAL_CONFIG not found at ${EVAL_CONFIG}" + exit 1 +fi # ----------------------------------------------------------------------- # Artifact copy trap diff --git a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml index d71924976b786..38ac24389ce8e 100644 --- a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml +++ b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml @@ -10,6 +10,8 @@ ref: name: claude-payload-agent-github-token mount_path: /var/run/github-token env: + - name: EVAL_REPO_DIR + default: "/opt/ai-helpers" - name: EVAL_CONFIG default: "eval.yaml" - name: EVAL_MODEL From 16dd02b172dc8102f49678d35f2b0bb77c53cfbd Mon Sep 17 00:00:00 2001 From: Douglas Hensel Date: Fri, 26 Jun 2026 15:46:40 -0400 Subject: [PATCH 2/2] coderabbit change to protect Fail fast on invalid setup-script output. --- .../agent-eval/openshift-claude-agent-eval-commands.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh index bc73a15db87f8..5ae8de0a1cf2d 100755 --- a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh +++ b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh @@ -60,9 +60,12 @@ if [[ -n "${EVAL_SETUP_SCRIPT}" ]]; then if [[ -f "${EVAL_SETUP_OUTPUT}" ]]; then EVAL_CONFIG="${EVAL_SETUP_OUTPUT}" echo "Setup script overrode EVAL_CONFIG: ${EVAL_CONFIG}" - else + elif [[ -d "${EVAL_SETUP_OUTPUT}" ]]; then export EVAL_SNAPSHOT_DIR="${EVAL_SETUP_OUTPUT}" echo "Snapshot dir: ${EVAL_SNAPSHOT_DIR}" + else + echo "ERROR: Setup script output is not a file or directory: ${EVAL_SETUP_OUTPUT}" + exit 1 fi fi fi