diff --git a/.github/workflows/check-readme.yml b/.github/workflows/check-readme.yml
deleted file mode 100644
index f179bda5..00000000
--- a/.github/workflows/check-readme.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-name: Check README.md in Skills
-
-on:
-  pull_request:
-    branches: ["*"]
-  push:
-    branches: ["main", "master"]
-
-jobs:
-  check-readme:
-    name: Verify README.md exists in all skill directories
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Check for README.md in each skill directory
-        run: |
-          echo "Checking for README.md in all skill directories..."
-          
-          # Find all immediate subdirectories of ./skills/
-          missing_readmes=()
-          
-          for dir in ./skills/*/; do
-            # Skip if not a directory
-            [ -d "$dir" ] || continue
-            
-            # Get the directory name
-            dirname=$(basename "$dir")
-            
-            # Skip hidden directories
-            [[ "$dirname" == .* ]] && continue
-            
-            # Check if README.md exists
-            if [ ! -f "${dir}README.md" ]; then
-              missing_readmes+=("$dirname")
-            fi
-          done
-          
-          # Report results
-          if [ ${#missing_readmes[@]} -gt 0 ]; then
-            echo ""
-            echo "❌ ERROR: The following skill directories are missing README.md:"
-            echo ""
-            for skill in "${missing_readmes[@]}"; do
-              echo "  - ./skills/$skill/"
-            done
-            echo ""
-            echo "Please add a README.md file to each of the above directories."
-            exit 1
-          else
-            echo ""
-            echo "✅ All skill directories contain a README.md file."
-          fi
diff --git a/.github/workflows/pr-review-by-openhands.yml b/.github/workflows/pr-review-by-openhands.yml
index 2516c9b2..9cf05232 100644
--- a/.github/workflows/pr-review-by-openhands.yml
+++ b/.github/workflows/pr-review-by-openhands.yml
@@ -39,7 +39,7 @@ jobs:
         runs-on: ubuntu-24.04
         steps:
             - name: Run PR Review
-              uses: OpenHands/software-agent-sdk/.github/actions/pr-review@main
+              uses: OpenHands/extensions/plugins/pr-review@main
               with:
                   llm-model: litellm_proxy/claude-sonnet-4-5-20250929
                   llm-base-url: https://llm-proxy.app.all-hands.dev
diff --git a/.github/workflows/pr-review-evaluation.yml b/.github/workflows/pr-review-evaluation.yml
new file mode 100644
index 00000000..60baa50c
--- /dev/null
+++ b/.github/workflows/pr-review-evaluation.yml
@@ -0,0 +1,85 @@
+---
+name: PR Review Evaluation
+
+# This workflow evaluates how well PR review comments were addressed.
+# It runs when a PR is closed to assess review effectiveness.
+#
+# Security note: pull_request_target is safe here because:
+# 1. Only triggers on PR close (not on code changes)
+# 2. Does not checkout PR code - only downloads artifacts from trusted workflow runs
+# 3. Runs evaluation scripts from the extensions repo, not from the PR
+
+on:
+    pull_request_target:
+        types: [closed]
+
+permissions:
+    contents: read
+    pull-requests: read
+
+jobs:
+    evaluate:
+        runs-on: ubuntu-24.04
+        env:
+            PR_NUMBER: ${{ github.event.pull_request.number }}
+            REPO_NAME: ${{ github.repository }}
+            PR_MERGED: ${{ github.event.pull_request.merged }}
+
+        steps:
+            - name: Download review trace artifact
+              id: download-trace
+              uses: dawidd6/action-download-artifact@v6
+              continue-on-error: true
+              with:
+                  workflow: pr-review-by-openhands.yml
+                  name: pr-review-trace-${{ github.event.pull_request.number }}
+                  path: trace-info
+                  search_artifacts: true
+                  if_no_artifact_found: warn
+
+            - name: Check if trace file exists
+              id: check-trace
+              run: |
+                  if [ -f "trace-info/laminar_trace_info.json" ]; then
+                    echo "trace_exists=true" >> $GITHUB_OUTPUT
+                    echo "Found trace file for PR #$PR_NUMBER"
+                  else
+                    echo "trace_exists=false" >> $GITHUB_OUTPUT
+                    echo "No trace file found for PR #$PR_NUMBER - skipping evaluation"
+                  fi
+
+            # Always checkout main branch for security - cannot test script changes in PRs
+            - name: Checkout extensions repository
+              if: steps.check-trace.outputs.trace_exists == 'true'
+              uses: actions/checkout@v5
+              with:
+                  repository: OpenHands/extensions
+                  path: extensions
+
+            - name: Set up Python
+              if: steps.check-trace.outputs.trace_exists == 'true'
+              uses: actions/setup-python@v6
+              with:
+                  python-version: '3.12'
+
+            - name: Install dependencies
+              if: steps.check-trace.outputs.trace_exists == 'true'
+              run: pip install lmnr
+
+            - name: Run evaluation
+              if: steps.check-trace.outputs.trace_exists == 'true'
+              env:
+                  # Script expects LMNR_PROJECT_API_KEY; org secret is named LMNR_SKILLS_API_KEY
+                  LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_SKILLS_API_KEY }}
+                  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+              run: |
+                  python extensions/plugins/pr-review/scripts/evaluate_review.py \
+                      --trace-file trace-info/laminar_trace_info.json
+
+            - name: Upload evaluation logs
+              uses: actions/upload-artifact@v5
+              if: always() && steps.check-trace.outputs.trace_exists == 'true'
+              with:
+                  name: pr-review-evaluation-${{ github.event.pull_request.number }}
+                  path: '*.log'
+                  retention-days: 30
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 00000000..308e5572
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,25 @@
+name: Tests
+
+on:
+  pull_request:
+    branches: ["*"]
+  push:
+    branches: ["main", "master"]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          enable-cache: true
+          python-version: "3.12"
+
+      - name: Run tests
+        run: uv run --group test pytest tests/
+        env:
+          PYTHONPATH: ${{ github.workspace }}
diff --git a/plugins/pr-review/README.md b/plugins/pr-review/README.md
index 2520204c..4c457fba 100644
--- a/plugins/pr-review/README.md
+++ b/plugins/pr-review/README.md
@@ -2,6 +2,20 @@
 
 Automated pull request review using OpenHands agents. This plugin provides GitHub workflows that automatically review PRs with detailed, inline code review comments.
 
+## Quick Start
+
+Copy both workflow files to your repository:
+
+```bash
+mkdir -p .github/workflows
+curl -o .github/workflows/pr-review-by-openhands.yml \
+  https://raw.githubusercontent.com/OpenHands/extensions/main/.github/workflows/pr-review-by-openhands.yml
+curl -o .github/workflows/pr-review-evaluation.yml \
+  https://raw.githubusercontent.com/OpenHands/extensions/main/.github/workflows/pr-review-evaluation.yml
+```
+
+Then configure the required secrets (see [Installation](#installation) below).
+
 ## Features
 
 - **Automated PR Reviews**: Triggered when PRs are opened, marked ready, or when a reviewer is requested
@@ -33,16 +47,16 @@ plugins/pr-review/
 
 ## Installation
 
-### 1. Copy the Workflow File
+### 1. Copy the Workflow Files
 
-Copy the workflow file to your repository's `.github/workflows/` directory:
+Copy the workflow files to your repository's `.github/workflows/` directory:
 
 ```bash
-# Option A: Download from GitHub
+mkdir -p .github/workflows
 curl -o .github/workflows/pr-review-by-openhands.yml \
-  https://raw.githubusercontent.com/OpenHands/extensions/main/plugins/pr-review/workflows/pr-review-by-openhands.yml
-
-# Option B: Create manually (see workflow content below)
+  https://raw.githubusercontent.com/OpenHands/extensions/main/.github/workflows/pr-review-by-openhands.yml
+curl -o .github/workflows/pr-review-evaluation.yml \
+  https://raw.githubusercontent.com/OpenHands/extensions/main/.github/workflows/pr-review-evaluation.yml
 ```
 
 ### 2. Configure Secrets
@@ -53,7 +67,7 @@ Add the following secrets in your repository settings (**Settings → Secrets an
 |--------|----------|-------------|
 | `LLM_API_KEY` | Yes | API key for your LLM provider |
 | `GITHUB_TOKEN` | Auto | Provided automatically by GitHub Actions |
-| `LMNR_PROJECT_API_KEY` | No | Laminar API key for observability |
+| `LMNR_SKILLS_API_KEY` | No | Laminar API key (org-level secret; mapped to `LMNR_PROJECT_API_KEY` env var in workflows) |
 
 **Note**: For repositories that need to post review comments from a bot account, use `ALLHANDS_BOT_GITHUB_PAT` instead of `GITHUB_TOKEN`.
 
@@ -157,14 +171,7 @@ One model is randomly selected for each review. When Laminar observability is en
 
 ### Review Evaluation
 
-To evaluate how well reviews were addressed, add the evaluation workflow:
-
-```bash
-curl -o .github/workflows/pr-review-evaluation.yml \
-  https://raw.githubusercontent.com/OpenHands/extensions/main/plugins/pr-review/workflows/pr-review-evaluation.yml
-```
-
-This workflow runs when PRs are closed and:
+The evaluation workflow (`pr-review-evaluation.yml`) runs when PRs are closed and:
 1. Downloads the review trace artifact
 2. Fetches final PR state and comments
 3. Creates an evaluation span in Laminar
diff --git a/plugins/pr-review/action.yml b/plugins/pr-review/action.yml
index d4e39352..ba96e839 100644
--- a/plugins/pr-review/action.yml
+++ b/plugins/pr-review/action.yml
@@ -77,11 +77,6 @@ runs:
               sudo apt-get update
               sudo apt-get install -y gh
 
-        - name: Install OpenHands dependencies
-          shell: bash
-          run: |
-              uv pip install --system openhands-sdk openhands-tools lmnr
-
         - name: Check required configuration and select model
           id: select-model
           shell: bash
@@ -132,7 +127,8 @@ runs:
               REPO_NAME: ${{ github.repository }}
           run: |
               cd pr-repo
-              uv run python ../extensions/plugins/pr-review/scripts/agent_script.py
+              uv run --with openhands-sdk --with openhands-tools --with lmnr \
+                python ../extensions/plugins/pr-review/scripts/agent_script.py
 
         - name: Upload logs as artifact
           uses: actions/upload-artifact@v4
diff --git a/plugins/pr-review/scripts/evaluate_review.py b/plugins/pr-review/scripts/evaluate_review.py
index 896f5292..cfc59b4b 100644
--- a/plugins/pr-review/scripts/evaluate_review.py
+++ b/plugins/pr-review/scripts/evaluate_review.py
@@ -243,14 +243,17 @@ def truncate_text(text: str, max_chars: int = 50000) -> str:
     return text[:max_chars] + f"\n\n... [truncated, {len(text)} total chars]"
 
 
-def load_trace_info() -> dict:
+def load_trace_info(trace_file_path: str | None = None) -> dict:
     """Load trace info from artifact file.
 
+    Args:
+        trace_file_path: Path to trace info JSON file. If None, uses default path.
+
     Returns:
         Dictionary with trace_id, span_context, and other metadata.
         Empty dict if file not found.
     """
-    trace_info_path = Path("laminar_trace_info.json")
+    trace_info_path = Path(trace_file_path) if trace_file_path else Path("laminar_trace_info.json")
 
     if not trace_info_path.exists():
         logger.warning(
@@ -412,8 +415,12 @@ def create_evaluation_span(
     return str(eval_trace_id) if eval_trace_id else None
 
 
-def main():
-    """Run the PR review evaluation."""
+def main(trace_file_path: str | None = None):
+    """Run the PR review evaluation.
+
+    Args:
+        trace_file_path: Optional path to trace info JSON file.
+    """
     logger.info("Starting PR review evaluation...")
 
     pr_number = _get_required_env("PR_NUMBER")
@@ -423,7 +430,7 @@ def main():
     logger.info(f"Evaluating PR #{pr_number} in {repo_name}")
     logger.info(f"PR was merged: {pr_merged}")
 
-    trace_info = load_trace_info()
+    trace_info = load_trace_info(trace_file_path)
     pr_data = fetch_pr_data(repo_name, pr_number)
     eval_trace_id = create_evaluation_span(
         pr_number, repo_name, pr_merged, pr_data, trace_info
@@ -478,8 +485,17 @@ def main():
 
 
 if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Evaluate PR review effectiveness")
+    parser.add_argument(
+        "--trace-file",
+        help="Path to trace info JSON file (default: laminar_trace_info.json)",
+    )
+    args = parser.parse_args()
+
     try:
-        main()
+        main(trace_file_path=args.trace_file)
     except Exception as e:
         logger.error(f"Evaluation failed: {e}")
         sys.exit(1)
diff --git a/plugins/pr-review/workflows/pr-review-by-openhands.yml b/plugins/pr-review/workflows/pr-review-by-openhands.yml
index 1efa205d..9cf05232 100644
--- a/plugins/pr-review/workflows/pr-review-by-openhands.yml
+++ b/plugins/pr-review/workflows/pr-review-by-openhands.yml
@@ -8,10 +8,9 @@ on:
     #   2. A draft PR is marked as ready for review, OR
     #   3. A maintainer adds the 'review-this' label, OR
     #   4. A maintainer requests openhands-agent or all-hands-bot as a reviewer
-    # Adding labels and requesting new reviewers requires write access. GitHub may also allow PR authors
-    # to re-request review from a previous reviewer.
+    # Adding labels and requesting reviewers requires write access.
     # The PR code is explicitly checked out for review, but secrets are only accessible
-    # because the workflow runs in the base repository context
+    # because the workflow runs in the base repository context.
     pull_request_target:
         types: [opened, ready_for_review, labeled, review_requested]
 
@@ -27,7 +26,7 @@ jobs:
         #   2. A draft PR is converted to ready for review by a non-first-time contributor, OR
         #   3. 'review-this' label is added, OR
         #   4. openhands-agent or all-hands-bot is requested as a reviewer
-        # Note: FIRST_TIME_CONTRIBUTOR PRs require manual trigger via label/reviewer request
+        # Note: FIRST_TIME_CONTRIBUTOR and NONE PRs require manual trigger via label/reviewer request.
         if: |
             (github.event.action == 'opened' && github.event.pull_request.draft == false && github.event.pull_request.author_association != 'FIRST_TIME_CONTRIBUTOR' && github.event.pull_request.author_association != 'NONE') ||
             (github.event.action == 'ready_for_review' && github.event.pull_request.author_association != 'FIRST_TIME_CONTRIBUTOR' && github.event.pull_request.author_association != 'NONE') ||
@@ -42,13 +41,10 @@ jobs:
             - name: Run PR Review
               uses: OpenHands/extensions/plugins/pr-review@main
               with:
-                  # LLM model(s) to use. Can be comma-separated for A/B testing
-                  # - one model will be randomly selected per review
-                  llm-model: anthropic/claude-sonnet-4-5-20250929
-                  # llm-base-url: https://llm-proxy.app.all-hands.dev
+                  llm-model: litellm_proxy/claude-sonnet-4-5-20250929
+                  llm-base-url: https://llm-proxy.app.all-hands.dev
                   # Review style: roasted (other option: standard)
                   review-style: roasted
                   llm-api-key: ${{ secrets.LLM_API_KEY }}
-                  github-token: ${{ secrets.GITHUB_TOKEN }}
-                  # Optional: Laminar API key for observability
-                  # lmnr-api-key: ${{ secrets.LMNR_PROJECT_API_KEY }}
+                  github-token: ${{ secrets.ALLHANDS_BOT_GITHUB_PAT }}
+                  lmnr-api-key: ${{ secrets.LMNR_SKILLS_API_KEY }}
diff --git a/plugins/pr-review/workflows/pr-review-evaluation.yml b/plugins/pr-review/workflows/pr-review-evaluation.yml
index 446df722..60baa50c 100644
--- a/plugins/pr-review/workflows/pr-review-evaluation.yml
+++ b/plugins/pr-review/workflows/pr-review-evaluation.yml
@@ -1,15 +1,13 @@
 ---
 name: PR Review Evaluation
 
-# This workflow runs when a PR is merged or closed to evaluate how well
-# the review agent's comments were addressed.
+# This workflow evaluates how well PR review comments were addressed.
+# It runs when a PR is closed to assess review effectiveness.
 #
-# It creates an evaluation trace in Laminar that can be processed by a
-# signal to determine review effectiveness.
-#
-# Prerequisites:
-# - PR must have been reviewed by pr-review-by-openhands.yml first
-# - Trace info artifact must exist from the review workflow
+# Security note: pull_request_target is safe here because:
+# 1. Only triggers on PR close (not on code changes)
+# 2. Does not checkout PR code - only downloads artifacts from trusted workflow runs
+# 3. Runs evaluation scripts from the extensions repo, not from the PR
 
 on:
     pull_request_target:
@@ -21,9 +19,6 @@ permissions:
 
 jobs:
     evaluate:
-        # Only run if:
-        # 1. This is a merged PR, AND
-        # 2. The PR was previously reviewed (has the trace artifact)
         runs-on: ubuntu-24.04
         env:
             PR_NUMBER: ${{ github.event.pull_request.number }}
@@ -31,8 +26,6 @@ jobs:
             PR_MERGED: ${{ github.event.pull_request.merged }}
 
         steps:
-            # Note: actions/download-artifact@v5 only works within the same workflow run.
-            # We use dawidd6/action-download-artifact to download from a different workflow.
             - name: Download review trace artifact
               id: download-trace
               uses: dawidd6/action-download-artifact@v6
@@ -44,8 +37,6 @@ jobs:
                   search_artifacts: true
                   if_no_artifact_found: warn
 
-            # Check if the trace file actually exists (the artifact download may
-            # succeed but with no matching artifact, only issuing a warning)
             - name: Check if trace file exists
               id: check-trace
               run: |
@@ -54,10 +45,10 @@ jobs:
                     echo "Found trace file for PR #$PR_NUMBER"
                   else
                     echo "trace_exists=false" >> $GITHUB_OUTPUT
-                    echo "No trace file found for PR #$PR_NUMBER"
-                    echo "This PR may not have been reviewed by the agent, skipping evaluation"
+                    echo "No trace file found for PR #$PR_NUMBER - skipping evaluation"
                   fi
 
+            # Always checkout main branch for security - cannot test script changes in PRs
             - name: Checkout extensions repository
               if: steps.check-trace.outputs.trace_exists == 'true'
               uses: actions/checkout@v5
@@ -69,38 +60,26 @@ jobs:
               if: steps.check-trace.outputs.trace_exists == 'true'
               uses: actions/setup-python@v6
               with:
-                  python-version: '3.13'
-
-            - name: Install uv
-              if: steps.check-trace.outputs.trace_exists == 'true'
-              uses: astral-sh/setup-uv@v7
-              with:
-                  enable-cache: true
+                  python-version: '3.12'
 
             - name: Install dependencies
               if: steps.check-trace.outputs.trace_exists == 'true'
-              run: |
-                  # Install lmnr SDK for Laminar integration
-                  uv pip install --system lmnr
+              run: pip install lmnr
 
             - name: Run evaluation
               if: steps.check-trace.outputs.trace_exists == 'true'
               env:
-                  LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_PROJECT_API_KEY }}
+                  # Script expects LMNR_PROJECT_API_KEY; org secret is named LMNR_SKILLS_API_KEY
+                  LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_SKILLS_API_KEY }}
                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
               run: |
-                  # Copy trace info to working directory
-                  cp trace-info/laminar_trace_info.json .
-
-                  # Run the evaluation script
-                  uv run python extensions/plugins/pr-review/scripts/evaluate_review.py
+                  python extensions/plugins/pr-review/scripts/evaluate_review.py \
+                      --trace-file trace-info/laminar_trace_info.json
 
             - name: Upload evaluation logs
               uses: actions/upload-artifact@v5
               if: always() && steps.check-trace.outputs.trace_exists == 'true'
               with:
                   name: pr-review-evaluation-${{ github.event.pull_request.number }}
-                  path: |
-                      *.log
-                      *.json
+                  path: '*.log'
                   retention-days: 30
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..a98a3801
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "extensions"
+version = "0.1.0"
+description = "OpenHands extensions, plugins, and skills"
+requires-python = ">=3.12"
+
+[dependency-groups]
+test = [
+    "pytest>=8.0",
+    "requests>=2.31",
+]
diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py
new file mode 100644
index 00000000..1373bcda
--- /dev/null
+++ b/tests/test_workflow_sync.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+"""Test that workflow files in .github/workflows/ match copies in plugins/*/workflows/."""
+
+from pathlib import Path
+
+
+def test_workflow_files_are_in_sync():
+    """Ensure workflow files in .github/workflows/ are identical to copies in plugin dirs."""
+    repo_root = Path(__file__).parent.parent
+    workflows_dir = repo_root / ".github" / "workflows"
+
+    if not workflows_dir.exists():
+        return  # No workflows directory
+
+    mismatches = []
+    # Check all plugins/*/workflows/ directories
+    for copy_path in repo_root.glob("plugins/*/workflows"):
+        if not copy_path.is_dir():
+            continue
+
+        for copy_file in copy_path.glob("*.yml"):
+            canonical_file = workflows_dir / copy_file.name
+            if not canonical_file.exists():
+                continue
+
+            canonical_content = canonical_file.read_text()
+            copy_content = copy_file.read_text()
+
+            if canonical_content != copy_content:
+                rel_path = copy_path.relative_to(repo_root)
+                mismatches.append(
+                    f"{rel_path}/{copy_file.name} differs from "
+                    f".github/workflows/{copy_file.name}"
+                )
+
+    if mismatches:
+        raise AssertionError(
+            "Workflow files are out of sync:\n"
+            + "\n".join(f"  - {m}" for m in mismatches)
+            + "\n\nRun: cp .github/workflows/<file> <destination> to sync"
+        )
+
+
+if __name__ == "__main__":
+    test_workflow_files_are_in_sync()
+    print("All workflow files are in sync!")