diff --git a/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml b/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml index c572084c2ac20..796ed5801c8d3 100644 --- a/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml +++ b/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml @@ -55,6 +55,17 @@ tests: clone: true from: root run_if_changed: (SKILL\.md|^scripts/lint-skills\.py|^Makefile|^plugins/.*/skills/) +- always_run: false + as: eval-skills + optional: true + run_if_changed: ^plugins/.*/(skills|evals) + steps: + env: + EVAL_REPO_DIR: /opt/app-root/src/edge-tooling + EVAL_MODEL: claude-sonnet-4-6 + EVAL_SETUP_SCRIPT: scripts/eval-setup-ci.sh + test: + - ref: openshift-claude-agent-eval - as: ocp-ci-monitor cron: 0 7 * * 1-5 reporter_config: diff --git a/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml b/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml index 458e2d110f7b6..c4d68c0f3d419 100644 --- a/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml +++ b/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml @@ -1,5 +1,82 @@ presubmits: openshift-eng/edge-tooling: + - agent: kubernetes + always_run: false + branches: + - ^main$ + - ^main- + cluster: build12 + context: ci/prow/eval-skills + decorate: true + decoration_config: + sparse_checkout_files: + - images/Containerfile.ci + - images/Containerfile.markdownlint + labels: + ci.openshift.io/generator: prowgen + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: pull-ci-openshift-eng-edge-tooling-main-eval-skills + optional: true + rerun_command: /test eval-skills + run_if_changed: ^plugins/.*/(skills|evals) + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --target=eval-skills + command: + - ci-operator + env: + - name: HTTP_SERVER_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest + imagePullPolicy: Always + name: "" + ports: + - containerPort: 8080 + name: http + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator + trigger: (?m)^/test( | .* )eval-skills,?($|\s.*) - agent: kubernetes always_run: true branches: diff --git a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh index 288bbd6ac6913..5ae8de0a1cf2d 100755 --- a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh +++ b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh @@ -31,22 +31,22 @@ else echo "Warning: GitHub token not found at ${GITHUB_TOKEN_PATH:-}. gh CLI will run unauthenticated." fi -# The repo is at /opt/ai-helpers; WORKDIR is /workspace -cd /opt/ai-helpers +cd "${EVAL_REPO_DIR}" echo "Config: ${EVAL_CONFIG}" echo "Skill model: ${EVAL_MODEL}" # ----------------------------------------------------------------------- -# Verify eval config exists +# Install plugins # ----------------------------------------------------------------------- -if [[ ! -f "${EVAL_CONFIG}" ]]; then - echo "ERROR: EVAL_CONFIG not found at ${EVAL_CONFIG}" - exit 1 -fi +echo "" +echo "=== Installing plugins ===" +EVAL_HARNESS_DIR="/tmp/agent-eval-harness" +git clone --depth 1 https://github.com/opendatahub-io/agent-eval-harness.git "${EVAL_HARNESS_DIR}" +echo "agent-eval-harness cloned." # ----------------------------------------------------------------------- -# Run optional setup script (e.g. extract snapshots, populate fixtures) +# Run optional setup script (e.g. extract snapshots, generate cases) # ----------------------------------------------------------------------- if [[ -n "${EVAL_SETUP_SCRIPT}" ]]; then if [[ ! -f "${EVAL_SETUP_SCRIPT}" ]]; then @@ -55,19 +55,28 @@ if [[ -n "${EVAL_SETUP_SCRIPT}" ]]; then fi echo "" echo "=== Running setup script: ${EVAL_SETUP_SCRIPT} ===" - EVAL_SNAPSHOT_DIR=$(bash "${EVAL_SETUP_SCRIPT}") - export EVAL_SNAPSHOT_DIR - echo "Snapshot dir: ${EVAL_SNAPSHOT_DIR}" + EVAL_SETUP_OUTPUT=$(bash "${EVAL_SETUP_SCRIPT}") + if [[ -n "${EVAL_SETUP_OUTPUT}" ]]; then + if [[ -f "${EVAL_SETUP_OUTPUT}" ]]; then + EVAL_CONFIG="${EVAL_SETUP_OUTPUT}" + echo "Setup script overrode EVAL_CONFIG: ${EVAL_CONFIG}" + elif [[ -d "${EVAL_SETUP_OUTPUT}" ]]; then + export EVAL_SNAPSHOT_DIR="${EVAL_SETUP_OUTPUT}" + echo "Snapshot dir: ${EVAL_SNAPSHOT_DIR}" + else + echo "ERROR: Setup script output is not a file or directory: ${EVAL_SETUP_OUTPUT}" + exit 1 + fi + fi fi # ----------------------------------------------------------------------- -# Install plugins +# Verify eval config exists # ----------------------------------------------------------------------- -echo "" -echo "=== Installing plugins ===" -EVAL_HARNESS_DIR="/tmp/agent-eval-harness" -git clone --depth 1 https://github.com/opendatahub-io/agent-eval-harness.git "${EVAL_HARNESS_DIR}" -echo "agent-eval-harness cloned." +if [[ ! -f "${EVAL_CONFIG}" ]]; then + echo "ERROR: EVAL_CONFIG not found at ${EVAL_CONFIG}" + exit 1 +fi # ----------------------------------------------------------------------- # Artifact copy trap diff --git a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml index d71924976b786..38ac24389ce8e 100644 --- a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml +++ b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml @@ -10,6 +10,8 @@ ref: name: claude-payload-agent-github-token mount_path: /var/run/github-token env: + - name: EVAL_REPO_DIR + default: "/opt/ai-helpers" - name: EVAL_CONFIG default: "eval.yaml" - name: EVAL_MODEL