lexmount · waple0820 · May 11, 2026 · May 11, 2026
diff --git a/.github/workflows/claude.yml → .github/workflows/claude.yml.disabled b/.github/workflows/claude.yml → .github/workflows/claude.yml.disabled
diff --git a/.github/workflows/cloud_evals.yml → .github/workflows/cloud_evals.yml.disabled b/.github/workflows/cloud_evals.yml → .github/workflows/cloud_evals.yml.disabled
diff --git a/.github/workflows/docker.yml → .github/workflows/docker.yml.disabled b/.github/workflows/docker.yml → .github/workflows/docker.yml.disabled
diff --git a/.github/workflows/eval-on-pr.yml → .github/workflows/eval-on-pr.yml.disabled b/.github/workflows/eval-on-pr.yml → .github/workflows/eval-on-pr.yml.disabled
diff --git a/.github/workflows/package.yaml → .github/workflows/package.yaml.disabled b/.github/workflows/package.yaml → .github/workflows/package.yaml.disabled
diff --git a/.github/workflows/publish.yml → .github/workflows/publish.yml.disabled b/.github/workflows/publish.yml → .github/workflows/publish.yml.disabled
diff --git a/.github/workflows/stale-bot.yml → .github/workflows/stale-bot.yml.disabled b/.github/workflows/stale-bot.yml → .github/workflows/stale-bot.yml.disabled
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -1,11 +1,6 @@
 name: test
 permissions:
-  actions: read
-  contents: write
-  pull-requests: write  # Allow writing comments on PRs
-  issues: write         # Allow writing comments on issues
-  statuses: write       # Allow writing statuses on PRs
-  discussions: write
+  contents: read
 
 # Cancel in-progress runs when a new commit is pushed to the same branch/PR
 concurrency:
@@ -86,15 +81,6 @@ jobs:
       IN_DOCKER: 'True'
       ANONYMIZED_TELEMETRY: 'false'
       BROWSER_USE_LOGGING_LEVEL: 'DEBUG'
-      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
-      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-      GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-      GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
-      AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
-      AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
-      BROWSER_USE_API_KEY: ${{ secrets.BROWSER_USE_API_KEY }}
-      OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
     strategy:
       matrix:
         test_filename: ${{ fromJson(needs.find_tests.outputs.TEST_FILENAMES || '["FAILED_TO_DISCOVER_TESTS"]') }}
@@ -180,158 +166,3 @@ jobs:
           max_attempts: 1
           retry_on: error
           command: pytest "tests/ci/${{ matrix.test_filename }}.py"
-
-  evaluate-tasks:
-    needs: setup-chromium
-    runs-on: ubuntu-latest
-    timeout-minutes: 8  # Allow more time for agent eval
-    env:
-      IN_DOCKER: 'true'
-      BROWSER_USE_CLOUD_SYNC: 'false'
-      ANONYMIZED_TELEMETRY: 'false'
-      BROWSER_USE_LOGGING_LEVEL: 'DEBUG'
-      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
-      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-      GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
-      GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
-      BROWSER_USE_API_KEY: ${{ secrets.BROWSER_USE_API_KEY }}
-    steps:
-      - uses: actions/checkout@v4
-      - uses: astral-sh/setup-uv@v6
-        with:
-          enable-cache: true
-          activate-environment: true
-
-      - name: Cache uv packages and venv
-        uses: actions/cache@v4
-        with:
-          path: |
-            ~/.cache/uv
-            .venv
-          key: ${{ runner.os }}-uv-venv-${{ hashFiles('pyproject.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-uv-venv-
-
-      - run: uv sync --dev --all-extras
-
-      - name: Get week number for cache key
-        id: week
-        run: echo "number=$(date +%Y-W%U)" >> $GITHUB_OUTPUT
-
-      - name: Cache chromium binaries
-        id: cache-chromium
-        uses: actions/cache@v4
-        with:
-          path: |
-            ~/.cache/ms-playwright
-          key: ${{ runner.os }}-${{ runner.arch }}-chromium-${{ steps.week.outputs.number }}
-          restore-keys: |
-            ${{ runner.os }}-${{ runner.arch }}-chromium-
-
-      - name: Install Chromium browser if not cached
-        if: steps.cache-chromium.outputs.cache-hit != 'true'
-        run: uvx playwright install chromium --with-deps --no-shell
-
-      - name: Cache browser-use extensions
-        uses: actions/cache@v4
-        with:
-          path: |
-            ~/.config/browseruse/extensions
-          key: ${{ runner.os }}-browseruse-extensions-${{ hashFiles('browser_use/browser/profile.py') }}
-          restore-keys: |
-            ${{ runner.os }}-browseruse-extensions-
-
-      - name: Run agent tasks evaluation and capture score
-        id: eval
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 4
-          max_attempts: 1
-          retry_on: error
-          command: |
-            python tests/ci/evaluate_tasks.py > result.txt
-            cat result.txt
-            echo "PASSED=$(grep '^PASSED=' result.txt | cut -d= -f2)" >> $GITHUB_ENV
-            echo "TOTAL=$(grep '^TOTAL=' result.txt | cut -d= -f2)" >> $GITHUB_ENV
-            echo "DETAILED_RESULTS=$(grep '^DETAILED_RESULTS=' result.txt | cut -d= -f2-)" >> $GITHUB_ENV
-
-      - name: Print agent evaluation summary
-        run: |
-          echo "Agent tasks passed: $PASSED / $TOTAL"
-
-      - name: Write agent evaluation summary to workflow overview
-        run: |
-          if [ "$PASSED" = "$TOTAL" ]; then
-            COLOR="green"
-          else
-            COLOR="yellow"
-          fi
-          echo "<h2>Agent Tasks Score: <span style='color:$COLOR;'>$PASSED/$TOTAL</span></h2>" >> $GITHUB_STEP_SUMMARY
-
-      - name: Comment PR with agent evaluation results
-        if: github.event_name == 'pull_request'
-        uses: actions/github-script@v7
-        continue-on-error: true
-        with:
-          script: |
-            const passed = parseInt(process.env.PASSED);
-            const total = parseInt(process.env.TOTAL);
-            const detailedResults = JSON.parse(process.env.DETAILED_RESULTS);
-            const score = `${passed}/${total}`;
-            const percentage = Math.round((passed / total) * 100);
-
-            // Fail the workflow if 0% pass rate
-            if (percentage === 0) {
-              core.setFailed(`Evaluation failed: 0% pass rate (${passed}/${total})`);
-            }
-
-            // Create detailed table
-            let tableRows = '';
-            detailedResults.forEach(result => {
-              const emoji = result.success ? '✅' : '❌';
-              const status = result.success ? 'Pass' : 'Fail';
-              tableRows += `| ${result.task} | ${emoji} ${status} | ${result.reason} |\n`;
-            });
-
-            const comment = `## Agent Task Evaluation Results: ${score} (${percentage}%)
-
-            <details>
-            <summary>View detailed results</summary>
-
-            | Task | Result | Reason |
-            |------|--------|--------|
-            ${tableRows}
-
-            Check the [evaluate-tasks job](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for detailed task execution logs.
-            </details>`;
-
-            // Find existing comment to update or create new one
-            const { data: comments } = await github.rest.issues.listComments({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number: context.issue.number,
-            });
-
-            const botComment = comments.find(comment =>
-              comment.user.type === 'Bot' &&
-              comment.body.includes('Agent Task Evaluation Results')
-            );
-
-            if (botComment) {
-              // Update existing comment
-              await github.rest.issues.updateComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                comment_id: botComment.id,
-                body: comment
-              });
-            } else {
-              // Create new comment
-              await github.rest.issues.createComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number: context.issue.number,
-                body: comment
-              });
-            }