From 19f82bf399f82811438e1d22d1c7ce5eb6464f6c Mon Sep 17 00:00:00 2001
From: Douglas Hensel <dhensel@redhat.com>
Date: Fri, 26 Jun 2026 15:04:54 -0400
Subject: [PATCH 1/2] OCPEDGE-2727: Add eval-skills presubmit and enhance
 agent-eval ref
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add EVAL_REPO_DIR env var to ref (backward compatible, defaults to
  /opt/ai-helpers)
- Reorder commands.sh: plugins install → setup script → config check
- Support setup script overriding EVAL_CONFIG or setting EVAL_SNAPSHOT_DIR
- Add eval-skills presubmit for edge-tooling (optional, triggers on
  plugins skill/eval changes)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../openshift-eng-edge-tooling-main.yaml      | 11 +++
 ...hift-eng-edge-tooling-main-presubmits.yaml | 77 +++++++++++++++++++
 .../openshift-claude-agent-eval-commands.sh   | 40 ++++++----
 .../openshift-claude-agent-eval-ref.yaml      |  2 +
 4 files changed, 113 insertions(+), 17 deletions(-)

diff --git a/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml b/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml
index c572084c2ac20..796ed5801c8d3 100644
--- a/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml
+++ b/ci-operator/config/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main.yaml
@@ -55,6 +55,17 @@ tests:
     clone: true
     from: root
   run_if_changed: (SKILL\.md|^scripts/lint-skills\.py|^Makefile|^plugins/.*/skills/)
+- always_run: false
+  as: eval-skills
+  optional: true
+  run_if_changed: ^plugins/.*/(skills|evals)
+  steps:
+    env:
+      EVAL_REPO_DIR: /opt/app-root/src/edge-tooling
+      EVAL_MODEL: claude-sonnet-4-6
+      EVAL_SETUP_SCRIPT: scripts/eval-setup-ci.sh
+    test:
+    - ref: openshift-claude-agent-eval
 - as: ocp-ci-monitor
   cron: 0 7 * * 1-5
   reporter_config:
diff --git a/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml b/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml
index 458e2d110f7b6..c4d68c0f3d419 100644
--- a/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml
+++ b/ci-operator/jobs/openshift-eng/edge-tooling/openshift-eng-edge-tooling-main-presubmits.yaml
@@ -1,5 +1,82 @@
 presubmits:
   openshift-eng/edge-tooling:
+  - agent: kubernetes
+    always_run: false
+    branches:
+    - ^main$
+    - ^main-
+    cluster: build12
+    context: ci/prow/eval-skills
+    decorate: true
+    decoration_config:
+      sparse_checkout_files:
+      - images/Containerfile.ci
+      - images/Containerfile.markdownlint
+    labels:
+      ci.openshift.io/generator: prowgen
+      pj-rehearse.openshift.io/can-be-rehearsed: "true"
+    name: pull-ci-openshift-eng-edge-tooling-main-eval-skills
+    optional: true
+    rerun_command: /test eval-skills
+    run_if_changed: ^plugins/.*/(skills|evals)
+    spec:
+      containers:
+      - args:
+        - --gcs-upload-secret=/secrets/gcs/service-account.json
+        - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
+        - --lease-server-credentials-file=/etc/boskos/credentials
+        - --report-credentials-file=/etc/report/credentials
+        - --target=eval-skills
+        command:
+        - ci-operator
+        env:
+        - name: HTTP_SERVER_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+        image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest
+        imagePullPolicy: Always
+        name: ""
+        ports:
+        - containerPort: 8080
+          name: http
+        resources:
+          requests:
+            cpu: 10m
+        volumeMounts:
+        - mountPath: /etc/boskos
+          name: boskos
+          readOnly: true
+        - mountPath: /secrets/gcs
+          name: gcs-credentials
+          readOnly: true
+        - mountPath: /secrets/manifest-tool
+          name: manifest-tool-local-pusher
+          readOnly: true
+        - mountPath: /etc/pull-secret
+          name: pull-secret
+          readOnly: true
+        - mountPath: /etc/report
+          name: result-aggregator
+          readOnly: true
+      serviceAccountName: ci-operator
+      volumes:
+      - name: boskos
+        secret:
+          items:
+          - key: credentials
+            path: credentials
+          secretName: boskos-credentials
+      - name: manifest-tool-local-pusher
+        secret:
+          secretName: manifest-tool-local-pusher
+      - name: pull-secret
+        secret:
+          secretName: registry-pull-credentials
+      - name: result-aggregator
+        secret:
+          secretName: result-aggregator
+    trigger: (?m)^/test( | .* )eval-skills,?($|\s.*)
   - agent: kubernetes
     always_run: true
     branches:
diff --git a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh
index 288bbd6ac6913..bc73a15db87f8 100755
--- a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh
+++ b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh
@@ -31,22 +31,22 @@ else
     echo "Warning: GitHub token not found at ${GITHUB_TOKEN_PATH:-<unset>}. gh CLI will run unauthenticated."
 fi
 
-# The repo is at /opt/ai-helpers; WORKDIR is /workspace
-cd /opt/ai-helpers
+cd "${EVAL_REPO_DIR}"
 
 echo "Config: ${EVAL_CONFIG}"
 echo "Skill model: ${EVAL_MODEL}"
 
 # -----------------------------------------------------------------------
-# Verify eval config exists
+# Install plugins
 # -----------------------------------------------------------------------
-if [[ ! -f "${EVAL_CONFIG}" ]]; then
-    echo "ERROR: EVAL_CONFIG not found at ${EVAL_CONFIG}"
-    exit 1
-fi
+echo ""
+echo "=== Installing plugins ==="
+EVAL_HARNESS_DIR="/tmp/agent-eval-harness"
+git clone --depth 1 https://github.com/opendatahub-io/agent-eval-harness.git "${EVAL_HARNESS_DIR}"
+echo "agent-eval-harness cloned."
 
 # -----------------------------------------------------------------------
-# Run optional setup script (e.g. extract snapshots, populate fixtures)
+# Run optional setup script (e.g. extract snapshots, generate cases)
 # -----------------------------------------------------------------------
 if [[ -n "${EVAL_SETUP_SCRIPT}" ]]; then
     if [[ ! -f "${EVAL_SETUP_SCRIPT}" ]]; then
@@ -55,19 +55,25 @@ if [[ -n "${EVAL_SETUP_SCRIPT}" ]]; then
     fi
     echo ""
     echo "=== Running setup script: ${EVAL_SETUP_SCRIPT} ==="
-    EVAL_SNAPSHOT_DIR=$(bash "${EVAL_SETUP_SCRIPT}")
-    export EVAL_SNAPSHOT_DIR
-    echo "Snapshot dir: ${EVAL_SNAPSHOT_DIR}"
+    EVAL_SETUP_OUTPUT=$(bash "${EVAL_SETUP_SCRIPT}")
+    if [[ -n "${EVAL_SETUP_OUTPUT}" ]]; then
+        if [[ -f "${EVAL_SETUP_OUTPUT}" ]]; then
+            EVAL_CONFIG="${EVAL_SETUP_OUTPUT}"
+            echo "Setup script overrode EVAL_CONFIG: ${EVAL_CONFIG}"
+        else
+            export EVAL_SNAPSHOT_DIR="${EVAL_SETUP_OUTPUT}"
+            echo "Snapshot dir: ${EVAL_SNAPSHOT_DIR}"
+        fi
+    fi
 fi
 
 # -----------------------------------------------------------------------
-# Install plugins
+# Verify eval config exists
 # -----------------------------------------------------------------------
-echo ""
-echo "=== Installing plugins ==="
-EVAL_HARNESS_DIR="/tmp/agent-eval-harness"
-git clone --depth 1 https://github.com/opendatahub-io/agent-eval-harness.git "${EVAL_HARNESS_DIR}"
-echo "agent-eval-harness cloned."
+if [[ ! -f "${EVAL_CONFIG}" ]]; then
+    echo "ERROR: EVAL_CONFIG not found at ${EVAL_CONFIG}"
+    exit 1
+fi
 
 # -----------------------------------------------------------------------
 # Artifact copy trap
diff --git a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml
index d71924976b786..38ac24389ce8e 100644
--- a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml
+++ b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-ref.yaml
@@ -10,6 +10,8 @@ ref:
     name: claude-payload-agent-github-token
     mount_path: /var/run/github-token
   env:
+  - name: EVAL_REPO_DIR
+    default: "/opt/ai-helpers"
   - name: EVAL_CONFIG
     default: "eval.yaml"
   - name: EVAL_MODEL

From 16dd02b172dc8102f49678d35f2b0bb77c53cfbd Mon Sep 17 00:00:00 2001
From: Douglas Hensel <dhensel@redhat.com>
Date: Fri, 26 Jun 2026 15:46:40 -0400
Subject: [PATCH 2/2] coderabbit change to protect Fail fast on invalid
 setup-script output.

---
 .../agent-eval/openshift-claude-agent-eval-commands.sh       | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh
index bc73a15db87f8..5ae8de0a1cf2d 100755
--- a/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh
+++ b/ci-operator/step-registry/openshift/claude/agent-eval/openshift-claude-agent-eval-commands.sh
@@ -60,9 +60,12 @@ if [[ -n "${EVAL_SETUP_SCRIPT}" ]]; then
         if [[ -f "${EVAL_SETUP_OUTPUT}" ]]; then
             EVAL_CONFIG="${EVAL_SETUP_OUTPUT}"
             echo "Setup script overrode EVAL_CONFIG: ${EVAL_CONFIG}"
-        else
+        elif [[ -d "${EVAL_SETUP_OUTPUT}" ]]; then
             export EVAL_SNAPSHOT_DIR="${EVAL_SETUP_OUTPUT}"
             echo "Snapshot dir: ${EVAL_SNAPSHOT_DIR}"
+        else
+            echo "ERROR: Setup script output is not a file or directory: ${EVAL_SETUP_OUTPUT}"
+            exit 1
         fi
     fi
 fi