From 3a9b4b044afd023d2e2c194d7b6f7618915b0ac2 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Tue, 28 Apr 2026 02:04:38 +0000
Subject: [PATCH 1/3] Automate LKG updates in nightly regression workflow

- Granted `contents: write` and `pull-requests: write` permissions in the caller workflow.
- Updated `.github/workflows/tpu-nightly-regression.yml` to extract exactly installed `vllm` and `tpu-inference` commits and automatically create a PR if those dependencies differ from the ones in the requirements files.

Co-authored-by: sizhit2 <32147610+sizhit2@users.noreply.github.com>
---
 ...uild_and_test_tunix_nightly_regression.yml |  3 +-
 .github/workflows/tpu-nightly-regression.yml  | 87 +++++++++++++++++++
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test_tunix_nightly_regression.yml b/.github/workflows/build_and_test_tunix_nightly_regression.yml
index 20abaabb6..bc1fac376 100644
--- a/.github/workflows/build_and_test_tunix_nightly_regression.yml
+++ b/.github/workflows/build_and_test_tunix_nightly_regression.yml
@@ -32,7 +32,8 @@ concurrency:
   cancel-in-progress: false
 
 permissions:
-  contents: read
+  contents: write
+  pull-requests: write
 jobs:
   build_tunix_package:
     name: Build tunix package
diff --git a/.github/workflows/tpu-nightly-regression.yml b/.github/workflows/tpu-nightly-regression.yml
index 15537db4f..63f611e24 100644
--- a/.github/workflows/tpu-nightly-regression.yml
+++ b/.github/workflows/tpu-nightly-regression.yml
@@ -35,6 +35,9 @@ env:
 
 jobs:
   run_prod:
+    outputs:
+      vllm_commit: ${{ steps.extract_commits.outputs.vllm_commit }}
+      tpu_inference_commit: ${{ steps.extract_commits.outputs.tpu_inference_commit }}
     runs-on: [linux-x86-ct5lp-224-8tpu]
     environment: testing
     container:
@@ -220,3 +223,87 @@ jobs:
         fi
         echo "🎉 All RL scripts completed successfully."
 
+    - name: Extract dependency commits
+      id: extract_commits
+      run: |
+        cat << 'EOF' > get_commits.py
+        import sys
+
+        try:
+            from importlib.metadata import requires, version
+        except ImportError:
+            import pkg_resources
+
+        def get_commit(package_name):
+            try:
+                import importlib.metadata
+                dist = importlib.metadata.distribution(package_name)
+                # the dist.read_text('direct_url.json') usually contains the exact git commit for git dependencies
+                import json
+                direct_url = dist.read_text('direct_url.json')
+                if direct_url:
+                    data = json.loads(direct_url)
+                    if 'vcs_info' in data and 'commit_id' in data['vcs_info']:
+                        return data['vcs_info']['commit_id']
+            except Exception as e:
+                pass
+            return None
+
+        vllm_commit = get_commit("vllm")
+        tpu_inference_commit = get_commit("tpu-inference")
+
+        with open(r"${{ github.output }}", "a") as f:
+            if vllm_commit:
+                f.write(f"vllm_commit={vllm_commit}\n")
+            if tpu_inference_commit:
+                f.write(f"tpu_inference_commit={tpu_inference_commit}\n")
+        EOF
+        sed -i "s|\${{ github.output }}|$GITHUB_OUTPUT|g" get_commits.py
+        python3 get_commits.py
+
+  update_lkg_pr:
+    needs: run_prod
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Update requirement files
+        run: |
+          VLLM_COMMIT="${{ needs.run_prod.outputs.vllm_commit }}"
+          TPU_INFERENCE_COMMIT="${{ needs.run_prod.outputs.tpu_inference_commit }}"
+
+          if [ -n "$VLLM_COMMIT" ]; then
+            sed -i "s|vllm-project/vllm\.git@[a-f0-9]\+|vllm-project/vllm.git@${VLLM_COMMIT}|g" requirements/requirements.txt
+          fi
+
+          if [ -n "$TPU_INFERENCE_COMMIT" ]; then
+            sed -i "s|vllm-project/tpu-inference\.git@[a-f0-9]\+|vllm-project/tpu-inference.git@${TPU_INFERENCE_COMMIT}|g" requirements/special_requirements.txt
+          fi
+
+      - name: Create Pull Request
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          git config --global user.name "github-actions[bot]"
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+
+          if git diff --exit-code; then
+            echo "No changes to commit."
+            exit 0
+          fi
+
+          BRANCH_NAME="update-lkg-commits-$(date +%s)"
+          git checkout -b "$BRANCH_NAME"
+          git add requirements/requirements.txt requirements/special_requirements.txt
+          git commit -m "chore: update vLLM and tpu-inference LKG commits"
+          git push origin "$BRANCH_NAME"
+
+          gh pr create \
+            --title "Update vLLM and tpu-inference LKG commits" \
+            --body "This PR updates the vLLM and tpu-inference pinned commits to the latest versions that passed the Tunix Nightly Regression tests." \
+            --base main \
+            --head "$BRANCH_NAME"

From e027f1465efe0a770106e5477e11ac502c075a9d Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 21:28:23 +0000
Subject: [PATCH 2/3] ci: add automated LKG capture to nightly regression tests

Adds `run_latest` and `update_lkg_pr` jobs to the nightly regression workflow. It pulls the known LKG commit for `vllm` from `tpu-inference`'s upstream and the HEAD commit for `tpu-inference`, runs tests, and updates `requirements.txt` via PR. The `run_prod` job is kept untouched.

Co-authored-by: sizhit2 <32147610+sizhit2@users.noreply.github.com>
---
 .github/workflows/tpu-nightly-regression.yml | 250 +++++++++++++++++--
 1 file changed, 225 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/tpu-nightly-regression.yml b/.github/workflows/tpu-nightly-regression.yml
index 63f611e24..680434945 100644
--- a/.github/workflows/tpu-nightly-regression.yml
+++ b/.github/workflows/tpu-nightly-regression.yml
@@ -35,9 +35,6 @@ env:
 
 jobs:
   run_prod:
-    outputs:
-      vllm_commit: ${{ steps.extract_commits.outputs.vllm_commit }}
-      tpu_inference_commit: ${{ steps.extract_commits.outputs.tpu_inference_commit }}
     runs-on: [linux-x86-ct5lp-224-8tpu]
     environment: testing
     container:
@@ -223,34 +220,64 @@ jobs:
         fi
         echo "🎉 All RL scripts completed successfully."
 
+  run_latest:
+    outputs:
+      vllm_commit: ${{ steps.extract_commits.outputs.vllm_commit }}
+      tpu_inference_commit: ${{ steps.extract_commits.outputs.tpu_inference_commit }}
+    runs-on: [linux-x86-ct5lp-224-8tpu]
+    environment: testing
+    container:
+      image: vllm/vllm-tpu:nightly
+      options: --privileged
+      env:
+        CLOUD_TPU_ACCELERATOR: v5e-8
+        JAX_PLATFORMS: tpu,cpu
+    steps:
+
+    # Cache Hugging Face hub
+    - name: Cache HF hub
+      uses: actions/cache@v4
+      with:
+        path: ~/.cache/huggingface
+        key: hf-${{ runner.os }}-${{ hashFiles('pyproject.toml', 'requirements*.txt', 'constraints*.txt') }}
+        restore-keys: |
+          hf-${{ runner.os }}-
+
+    - name: Checkout code
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
     - name: Extract dependency commits
       id: extract_commits
       run: |
         cat << 'EOF' > get_commits.py
-        import sys
+        import urllib.request
+        import json
 
-        try:
-            from importlib.metadata import requires, version
-        except ImportError:
-            import pkg_resources
+        def get_head_commit(repo):
+            url = f"https://api.github.com/repos/{repo}/commits/main"
+            req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+            try:
+                with urllib.request.urlopen(req) as response:
+                    data = json.loads(response.read().decode())
+                    return data['sha']
+            except Exception as e:
+                print(f"Error fetching {repo}: {e}")
+                return None
 
-        def get_commit(package_name):
+        def get_vllm_lkg_commit():
+            url = "https://raw.githubusercontent.com/vllm-project/tpu-inference/main/.buildkite/vllm_lkg.version"
+            req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
             try:
-                import importlib.metadata
-                dist = importlib.metadata.distribution(package_name)
-                # the dist.read_text('direct_url.json') usually contains the exact git commit for git dependencies
-                import json
-                direct_url = dist.read_text('direct_url.json')
-                if direct_url:
-                    data = json.loads(direct_url)
-                    if 'vcs_info' in data and 'commit_id' in data['vcs_info']:
-                        return data['vcs_info']['commit_id']
+                with urllib.request.urlopen(req) as response:
+                    return response.read().decode().strip()
             except Exception as e:
-                pass
-            return None
+                print(f"Error fetching vllm lkg: {e}")
+                return None
 
-        vllm_commit = get_commit("vllm")
-        tpu_inference_commit = get_commit("tpu-inference")
+        vllm_commit = get_vllm_lkg_commit()
+        tpu_inference_commit = get_head_commit("vllm-project/tpu-inference")
 
         with open(r"${{ github.output }}", "a") as f:
             if vllm_commit:
@@ -261,8 +288,181 @@ jobs:
         sed -i "s|\${{ github.output }}|$GITHUB_OUTPUT|g" get_commits.py
         python3 get_commits.py
 
+    - name: Install tunix dependencies
+      run: |
+        # Update requirement files with specific commits
+        VLLM_COMMIT="${{ steps.extract_commits.outputs.vllm_commit }}"
+        TPU_INFERENCE_COMMIT="${{ steps.extract_commits.outputs.tpu_inference_commit }}"
+
+        if [ -n "$VLLM_COMMIT" ]; then
+          sed -i "s|vllm-project/vllm\.git@[a-f0-9]\+|vllm-project/vllm.git@${VLLM_COMMIT}|g" requirements/requirements.txt
+        fi
+
+        if [ -n "$TPU_INFERENCE_COMMIT" ]; then
+          sed -i "s|vllm-project/tpu-inference\.git@[a-f0-9]\+|vllm-project/tpu-inference.git@${TPU_INFERENCE_COMMIT}|g" requirements/special_requirements.txt
+        fi
+
+        pip install -e .[prod,test]
+
+    - name: Verify TPU availability
+      run: |
+        python -c "
+        import jax
+        print(f'JAX version: {jax.__version__}')
+        print(f'JAX devices: {jax.devices()}')
+
+        # Check if we have TPU devices specifically
+        devices = jax.devices()
+        has_tpu = len(devices) > 0 and all(device.platform == 'tpu' for device in devices)
+        print(f'TPU available: {has_tpu}')
+
+        if not has_tpu:
+            print('ERROR: No TPU devices found! Expected TPU devices but got:', [device.platform for device in devices])
+            exit(1)
+        else:
+            print(f'SUCCESS: Found {len(devices)} TPU device(s)')
+        "
+
+    - name: Run regression scripts
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      id: regression_tests
+      run: |
+        # Download GSM8K dataset
+        mkdir -p /tmp/grpo_test/rl/grpo/data
+
+        FAILED=0
+        echo "📦 Executing: examples/deepscaler/math_eval_nb.py..."
+        python examples/deepscaler/math_eval_nb.py || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in colocated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in 2 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --cluster-setup=disaggregated-2-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in 3 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --cluster-setup=disaggregated-3-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in colocated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in 2 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --cluster-setup=disaggregated-2-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in 3 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --cluster-setup=disaggregated-3-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm server mode in 2 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --rollout-server-mode=True --cluster-setup=disaggregated-2-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm server mode in 3 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --rollout-server-mode=True --cluster-setup=disaggregated-3-way || FAILED=1
+
+        # SGLang Tests
+        unset JAX_PLATFORMS
+        pip list | egrep 'jax|flax|libtpu'
+        cd ..
+        git clone https://github.com/sgl-project/sglang-jax.git && cd sglang-jax/python && pip install -e . && cd ../..
+        pip install jax==0.8.1 flax==0.12.0 libtpu==0.0.24
+        pip list | egrep 'jax|flax|libtpu'
+        cd tunix
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in colocated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in 2 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax --cluster-setup=disaggregated-2-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in 3 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax --cluster-setup=disaggregated-3-way || FAILED=1
+
+        # echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax with LoRA ..."
+        # python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine sglang_jax --enable-lora --lora-target-modules all || FAILED=1
+
+
+        if [ "$FAILED" -ne 0 ]; then
+          echo "One or more scripts failed!"
+          exit 1
+        fi
+
+    - name: Run SFT shell scripts
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      run: |
+        SCRIPT_DIR="./tunix/examples/sft/mtnt"
+        MAX_STEPS=5
+        EVAL_EVERY_N_STEPS=1
+
+        # Check if directory exists
+        if [ ! -d "$SCRIPT_DIR" ]; then
+          echo "Directory $SCRIPT_DIR does not exist"
+          exit 1
+        fi
+
+        echo "🔍 Finding scripts in $SCRIPT_DIR"
+        for script in "$SCRIPT_DIR"/*.sh; do
+          if [ -f "$script" ]; then
+            echo "📦 Executing: $script"
+            echo "MAX_STEPS=$MAX_STEPS, EVAL_EVERY_N_STEPS=$EVAL_EVERY_N_STEPS"
+            chmod +x "$script"
+            if bash "$script" \
+                --training_config.max_steps "$MAX_STEPS" \
+                --training_config.eval_every_n_steps "$EVAL_EVERY_N_STEPS"; then
+              echo "✅ Successfully completed: $script"
+            else
+              exit_code=$?
+              echo "❌ Failed to complete: $script (Exit Code: $exit_code)" >&2
+              exit "$exit_code"
+            fi
+          fi
+        done
+        echo "🎉 All SFT scripts completed successfully."
+
+    - name: Run RL shell scripts
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      run: |
+        SCRIPT_DIR="./tunix/examples/rl/grpo/gsm8k"
+        EXCLUDE_DIR="verl_compatible"
+        MAX_STEPS=5
+        EVAL_EVERY_N_STEPS=1
+
+        if [ ! -d "$SCRIPT_DIR" ]; then
+          echo "Directory $SCRIPT_DIR does not exist" >&2
+          exit 1
+        fi
+
+        echo "🔍 Finding scripts in $SCRIPT_DIR, excluding $EXCLUDE_DIR"
+        final_exit_code=0
+
+        while IFS= read -r script; do
+          if [ -f "$script" ]; then
+            echo "📦 Executing: $script"
+            echo "MAX_STEPS=$MAX_STEPS, EVAL_EVERY_N_STEPS=$EVAL_EVERY_N_STEPS"
+            chmod +x "$script"
+            if ! bash "$script" \
+                --rl_training_config.max_steps "$MAX_STEPS" \
+                --rl_training_config.eval_every_n_steps "$EVAL_EVERY_N_STEPS"; then
+              exit_code=$?
+              echo "❌ Failed to complete: $script (Exit Code: $exit_code)" >&2
+              final_exit_code=$exit_code
+              # Stop processing further scripts after the first failure
+              break
+            else
+              echo "✅ Successfully completed: $script"
+            fi
+          fi
+        done < <(find "$SCRIPT_DIR" -name "*.sh" -type f | grep -v "$SCRIPT_DIR/$EXCLUDE_DIR/")
+
+        if [ "$final_exit_code" -ne 0 ]; then
+          echo "🚫 One or more RL scripts failed. Exiting with code $final_exit_code." >&2
+          exit "$final_exit_code"
+        fi
+        echo "🎉 All RL scripts completed successfully."
+
   update_lkg_pr:
-    needs: run_prod
+    needs: run_latest
     runs-on: ubuntu-latest
     permissions:
       contents: write
@@ -273,8 +473,8 @@ jobs:
 
       - name: Update requirement files
         run: |
-          VLLM_COMMIT="${{ needs.run_prod.outputs.vllm_commit }}"
-          TPU_INFERENCE_COMMIT="${{ needs.run_prod.outputs.tpu_inference_commit }}"
+          VLLM_COMMIT="${{ needs.run_latest.outputs.vllm_commit }}"
+          TPU_INFERENCE_COMMIT="${{ needs.run_latest.outputs.tpu_inference_commit }}"
 
           if [ -n "$VLLM_COMMIT" ]; then
             sed -i "s|vllm-project/vllm\.git@[a-f0-9]\+|vllm-project/vllm.git@${VLLM_COMMIT}|g" requirements/requirements.txt

From de62d11f2f7732286218a48d2b665dec1a04ce49 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 22:54:17 +0000
Subject: [PATCH 3/3] ci: automate LKG commits generation for vllm and
 tpu-inference

Moves test steps into a composite action and adds a job to update LKGs with git ls-remote and buildkite curl endpoint.

Co-authored-by: sizhit2 <32147610+sizhit2@users.noreply.github.com>
---
 .../actions/run_regression_tests/action.yml   | 150 +++++++++
 .github/workflows/tpu-nightly-regression.yml  | 300 +-----------------
 2 files changed, 166 insertions(+), 284 deletions(-)
 create mode 100644 .github/actions/run_regression_tests/action.yml

diff --git a/.github/actions/run_regression_tests/action.yml b/.github/actions/run_regression_tests/action.yml
new file mode 100644
index 000000000..7dacd03a1
--- /dev/null
+++ b/.github/actions/run_regression_tests/action.yml
@@ -0,0 +1,150 @@
+name: "Run Regression Tests"
+description: "Runs Tunix regression tests, SFT shell scripts, and RL shell scripts."
+inputs:
+  hf_token:
+    description: "HuggingFace token for model downloads"
+    required: true
+
+runs:
+  using: "composite"
+  steps:
+    - name: Run regression scripts
+      env:
+        HF_TOKEN: ${{ inputs.hf_token }}
+      id: regression_tests
+      shell: bash
+      run: |
+        # Download GSM8K dataset
+        mkdir -p /tmp/grpo_test/rl/grpo/data
+
+        FAILED=0
+        echo "📦 Executing: examples/deepscaler/math_eval_nb.py..."
+        python examples/deepscaler/math_eval_nb.py || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in colocated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in 2 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --cluster-setup=disaggregated-2-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in 3 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --cluster-setup=disaggregated-3-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in colocated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in 2 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --cluster-setup=disaggregated-2-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in 3 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --cluster-setup=disaggregated-3-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm server mode in 2 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --rollout-server-mode=True --cluster-setup=disaggregated-2-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm server mode in 3 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --rollout-server-mode=True --cluster-setup=disaggregated-3-way || FAILED=1
+
+        # SGLang Tests
+        unset JAX_PLATFORMS
+        pip list | egrep 'jax|flax|libtpu'
+        cd ..
+        git clone https://github.com/sgl-project/sglang-jax.git && cd sglang-jax/python && pip install -e . && cd ../..
+        pip install jax==0.8.1 flax==0.12.0 libtpu==0.0.24
+        pip list | egrep 'jax|flax|libtpu'
+        cd tunix
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in colocated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in 2 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax --cluster-setup=disaggregated-2-way || FAILED=1
+
+        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in 3 way disaggregated mode ..."
+        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax --cluster-setup=disaggregated-3-way || FAILED=1
+
+        # echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax with LoRA ..."
+        # python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine sglang_jax --enable-lora --lora-target-modules all || FAILED=1
+
+
+        if [ "$FAILED" -ne 0 ]; then
+          echo "One or more scripts failed!"
+          exit 1
+        fi
+
+    - name: Run SFT shell scripts
+      env:
+        HF_TOKEN: ${{ inputs.hf_token }}
+      shell: bash
+      run: |
+        SCRIPT_DIR="./tunix/examples/sft/mtnt"
+        MAX_STEPS=5
+        EVAL_EVERY_N_STEPS=1
+
+        # Check if directory exists
+        if [ ! -d "$SCRIPT_DIR" ]; then
+          echo "Directory $SCRIPT_DIR does not exist"
+          exit 1
+        fi
+
+        echo "🔍 Finding scripts in $SCRIPT_DIR"
+        for script in "$SCRIPT_DIR"/*.sh; do
+          if [ -f "$script" ]; then
+            echo "📦 Executing: $script"
+            echo "MAX_STEPS=$MAX_STEPS, EVAL_EVERY_N_STEPS=$EVAL_EVERY_N_STEPS"
+            chmod +x "$script"
+            if bash "$script" \
+                --training_config.max_steps "$MAX_STEPS" \
+                --training_config.eval_every_n_steps "$EVAL_EVERY_N_STEPS"; then
+              echo "✅ Successfully completed: $script"
+            else
+              exit_code=$?
+              echo "❌ Failed to complete: $script (Exit Code: $exit_code)" >&2
+              exit "$exit_code"
+            fi
+          fi
+        done
+        echo "🎉 All SFT scripts completed successfully."
+
+    - name: Run RL shell scripts
+      env:
+        HF_TOKEN: ${{ inputs.hf_token }}
+      shell: bash
+      run: |
+        SCRIPT_DIR="./tunix/examples/rl/grpo/gsm8k"
+        EXCLUDE_DIR="verl_compatible"
+        MAX_STEPS=5
+        EVAL_EVERY_N_STEPS=1
+
+        if [ ! -d "$SCRIPT_DIR" ]; then
+          echo "Directory $SCRIPT_DIR does not exist" >&2
+          exit 1
+        fi
+
+        echo "🔍 Finding scripts in $SCRIPT_DIR, excluding $EXCLUDE_DIR"
+        final_exit_code=0
+
+        while IFS= read -r script; do
+          if [ -f "$script" ]; then
+            echo "📦 Executing: $script"
+            echo "MAX_STEPS=$MAX_STEPS, EVAL_EVERY_N_STEPS=$EVAL_EVERY_N_STEPS"
+            chmod +x "$script"
+            if ! bash "$script" \
+                --rl_training_config.max_steps "$MAX_STEPS" \
+                --rl_training_config.eval_every_n_steps "$EVAL_EVERY_N_STEPS"; then
+              exit_code=$?
+              echo "❌ Failed to complete: $script (Exit Code: $exit_code)" >&2
+              final_exit_code=$exit_code
+              # Stop processing further scripts after the first failure
+              break
+            else
+              echo "✅ Successfully completed: $script"
+            fi
+          fi
+        done < <(find "$SCRIPT_DIR" -name "*.sh" -type f | grep -v "$SCRIPT_DIR/$EXCLUDE_DIR/")
+
+        if [ "$final_exit_code" -ne 0 ]; then
+          echo "🚫 One or more RL scripts failed. Exiting with code $final_exit_code." >&2
+          exit "$final_exit_code"
+        fi
+        echo "🎉 All RL scripts completed successfully."
diff --git a/.github/workflows/tpu-nightly-regression.yml b/.github/workflows/tpu-nightly-regression.yml
index 680434945..46b047827 100644
--- a/.github/workflows/tpu-nightly-regression.yml
+++ b/.github/workflows/tpu-nightly-regression.yml
@@ -82,143 +82,10 @@ jobs:
             print(f'SUCCESS: Found {len(devices)} TPU device(s)')
         "
 
-    - name: Run regression scripts
-      env:
-        HF_TOKEN: ${{ secrets.HF_TOKEN }}
-      id: regression_tests
-      run: |
-        # Download GSM8K dataset
-        mkdir -p /tmp/grpo_test/rl/grpo/data
-
-        FAILED=0
-        echo "📦 Executing: examples/deepscaler/math_eval_nb.py..."
-        python examples/deepscaler/math_eval_nb.py || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in colocated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in 2 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --cluster-setup=disaggregated-2-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in 3 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --cluster-setup=disaggregated-3-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in colocated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in 2 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --cluster-setup=disaggregated-2-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in 3 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --cluster-setup=disaggregated-3-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm server mode in 2 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --rollout-server-mode=True --cluster-setup=disaggregated-2-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm server mode in 3 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --rollout-server-mode=True --cluster-setup=disaggregated-3-way || FAILED=1
-
-        # SGLang Tests
-        unset JAX_PLATFORMS
-        pip list | egrep 'jax|flax|libtpu'
-        cd ..
-        git clone https://github.com/sgl-project/sglang-jax.git && cd sglang-jax/python && pip install -e . && cd ../..
-        pip install jax==0.8.1 flax==0.12.0 libtpu==0.0.24
-        pip list | egrep 'jax|flax|libtpu'
-        cd tunix
-        
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in colocated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in 2 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax --cluster-setup=disaggregated-2-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in 3 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax --cluster-setup=disaggregated-3-way || FAILED=1
-
-        # echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax with LoRA ..."
-        # python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine sglang_jax --enable-lora --lora-target-modules all || FAILED=1
-
-    
-        if [ "$FAILED" -ne 0 ]; then
-          echo "One or more scripts failed!"
-          exit 1
-        fi
-
-    - name: Run SFT shell scripts
-      env:
-        HF_TOKEN: ${{ secrets.HF_TOKEN }}
-      run: |
-        SCRIPT_DIR="./tunix/examples/sft/mtnt"
-        MAX_STEPS=5
-        EVAL_EVERY_N_STEPS=1
-
-        # Check if directory exists
-        if [ ! -d "$SCRIPT_DIR" ]; then
-          echo "Directory $SCRIPT_DIR does not exist"
-          exit 1
-        fi
-
-        echo "🔍 Finding scripts in $SCRIPT_DIR"
-        for script in "$SCRIPT_DIR"/*.sh; do
-          if [ -f "$script" ]; then
-            echo "📦 Executing: $script"
-            echo "MAX_STEPS=$MAX_STEPS, EVAL_EVERY_N_STEPS=$EVAL_EVERY_N_STEPS"
-            chmod +x "$script"
-            if bash "$script" \
-                --training_config.max_steps "$MAX_STEPS" \
-                --training_config.eval_every_n_steps "$EVAL_EVERY_N_STEPS"; then
-              echo "✅ Successfully completed: $script"
-            else
-              exit_code=$?
-              echo "❌ Failed to complete: $script (Exit Code: $exit_code)" >&2
-              exit "$exit_code"
-            fi
-          fi
-        done
-        echo "🎉 All SFT scripts completed successfully."
-
-    - name: Run RL shell scripts
-      env:
-        HF_TOKEN: ${{ secrets.HF_TOKEN }}
-      run: |
-        SCRIPT_DIR="./tunix/examples/rl/grpo/gsm8k"
-        EXCLUDE_DIR="verl_compatible"
-        MAX_STEPS=5
-        EVAL_EVERY_N_STEPS=1
-
-        if [ ! -d "$SCRIPT_DIR" ]; then
-          echo "Directory $SCRIPT_DIR does not exist" >&2
-          exit 1
-        fi
-
-        echo "🔍 Finding scripts in $SCRIPT_DIR, excluding $EXCLUDE_DIR"
-        final_exit_code=0
-
-        while IFS= read -r script; do
-          if [ -f "$script" ]; then
-            echo "📦 Executing: $script"
-            echo "MAX_STEPS=$MAX_STEPS, EVAL_EVERY_N_STEPS=$EVAL_EVERY_N_STEPS"
-            chmod +x "$script"
-            if ! bash "$script" \
-                --rl_training_config.max_steps "$MAX_STEPS" \
-                --rl_training_config.eval_every_n_steps "$EVAL_EVERY_N_STEPS"; then
-              exit_code=$?
-              echo "❌ Failed to complete: $script (Exit Code: $exit_code)" >&2
-              final_exit_code=$exit_code
-              # Stop processing further scripts after the first failure
-              break
-            else
-              echo "✅ Successfully completed: $script"
-            fi
-          fi
-        done < <(find "$SCRIPT_DIR" -name "*.sh" -type f | grep -v "$SCRIPT_DIR/$EXCLUDE_DIR/")
-
-        if [ "$final_exit_code" -ne 0 ]; then
-          echo "🚫 One or more RL scripts failed. Exiting with code $final_exit_code." >&2
-          exit "$final_exit_code"
-        fi
-        echo "🎉 All RL scripts completed successfully."
+    - name: Run regression tests (shared)
+      uses: ./.github/actions/run_regression_tests
+      with:
+        hf_token: ${{ secrets.HF_TOKEN }}
 
   run_latest:
     outputs:
@@ -254,16 +121,15 @@ jobs:
         cat << 'EOF' > get_commits.py
         import urllib.request
         import json
+        import os
+        import subprocess
 
-        def get_head_commit(repo):
-            url = f"https://api.github.com/repos/{repo}/commits/main"
-            req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+        def get_head_commit(repo_url):
             try:
-                with urllib.request.urlopen(req) as response:
-                    data = json.loads(response.read().decode())
-                    return data['sha']
+                result = subprocess.run(["git", "ls-remote", repo_url, "HEAD"], capture_output=True, text=True, check=True)
+                return result.stdout.split()[0]
             except Exception as e:
-                print(f"Error fetching {repo}: {e}")
+                print(f"Error fetching {repo_url}: {e}")
                 return None
 
         def get_vllm_lkg_commit():
@@ -277,15 +143,14 @@ jobs:
                 return None
 
         vllm_commit = get_vllm_lkg_commit()
-        tpu_inference_commit = get_head_commit("vllm-project/tpu-inference")
+        tpu_inference_commit = get_head_commit("https://github.com/vllm-project/tpu-inference.git")
 
-        with open(r"${{ github.output }}", "a") as f:
+        with open(os.environ["GITHUB_OUTPUT"], "a") as f:
             if vllm_commit:
                 f.write(f"vllm_commit={vllm_commit}\n")
             if tpu_inference_commit:
                 f.write(f"tpu_inference_commit={tpu_inference_commit}\n")
         EOF
-        sed -i "s|\${{ github.output }}|$GITHUB_OUTPUT|g" get_commits.py
         python3 get_commits.py
 
     - name: Install tunix dependencies
@@ -323,143 +188,10 @@ jobs:
             print(f'SUCCESS: Found {len(devices)} TPU device(s)')
         "
 
-    - name: Run regression scripts
-      env:
-        HF_TOKEN: ${{ secrets.HF_TOKEN }}
-      id: regression_tests
-      run: |
-        # Download GSM8K dataset
-        mkdir -p /tmp/grpo_test/rl/grpo/data
-
-        FAILED=0
-        echo "📦 Executing: examples/deepscaler/math_eval_nb.py..."
-        python examples/deepscaler/math_eval_nb.py || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in colocated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in 2 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --cluster-setup=disaggregated-2-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vanilla rollout engine in 3 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --cluster-setup=disaggregated-3-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in colocated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in 2 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --cluster-setup=disaggregated-2-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm rollout engine in 3 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --cluster-setup=disaggregated-3-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm server mode in 2 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --rollout-server-mode=True --cluster-setup=disaggregated-2-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with vllm server mode in 3 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test  --num-batches=20 --rollout-engine=vllm --rollout-server-mode=True --cluster-setup=disaggregated-3-way || FAILED=1
-
-        # SGLang Tests
-        unset JAX_PLATFORMS
-        pip list | egrep 'jax|flax|libtpu'
-        cd ..
-        git clone https://github.com/sgl-project/sglang-jax.git && cd sglang-jax/python && pip install -e . && cd ../..
-        pip install jax==0.8.1 flax==0.12.0 libtpu==0.0.24
-        pip list | egrep 'jax|flax|libtpu'
-        cd tunix
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in colocated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in 2 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax --cluster-setup=disaggregated-2-way || FAILED=1
-
-        echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax in 3 way disaggregated mode ..."
-        python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine=sglang_jax --cluster-setup=disaggregated-3-way || FAILED=1
-
-        # echo "📦 Executing: scripts/grpo_demo_llama3_qwen2.py with sglang_jax with LoRA ..."
-        # python scripts/grpo_demo_llama3_qwen2.py --root-dir=/tmp/grpo_test --num-batches=20 --rollout-engine sglang_jax --enable-lora --lora-target-modules all || FAILED=1
-
-
-        if [ "$FAILED" -ne 0 ]; then
-          echo "One or more scripts failed!"
-          exit 1
-        fi
-
-    - name: Run SFT shell scripts
-      env:
-        HF_TOKEN: ${{ secrets.HF_TOKEN }}
-      run: |
-        SCRIPT_DIR="./tunix/examples/sft/mtnt"
-        MAX_STEPS=5
-        EVAL_EVERY_N_STEPS=1
-
-        # Check if directory exists
-        if [ ! -d "$SCRIPT_DIR" ]; then
-          echo "Directory $SCRIPT_DIR does not exist"
-          exit 1
-        fi
-
-        echo "🔍 Finding scripts in $SCRIPT_DIR"
-        for script in "$SCRIPT_DIR"/*.sh; do
-          if [ -f "$script" ]; then
-            echo "📦 Executing: $script"
-            echo "MAX_STEPS=$MAX_STEPS, EVAL_EVERY_N_STEPS=$EVAL_EVERY_N_STEPS"
-            chmod +x "$script"
-            if bash "$script" \
-                --training_config.max_steps "$MAX_STEPS" \
-                --training_config.eval_every_n_steps "$EVAL_EVERY_N_STEPS"; then
-              echo "✅ Successfully completed: $script"
-            else
-              exit_code=$?
-              echo "❌ Failed to complete: $script (Exit Code: $exit_code)" >&2
-              exit "$exit_code"
-            fi
-          fi
-        done
-        echo "🎉 All SFT scripts completed successfully."
-
-    - name: Run RL shell scripts
-      env:
-        HF_TOKEN: ${{ secrets.HF_TOKEN }}
-      run: |
-        SCRIPT_DIR="./tunix/examples/rl/grpo/gsm8k"
-        EXCLUDE_DIR="verl_compatible"
-        MAX_STEPS=5
-        EVAL_EVERY_N_STEPS=1
-
-        if [ ! -d "$SCRIPT_DIR" ]; then
-          echo "Directory $SCRIPT_DIR does not exist" >&2
-          exit 1
-        fi
-
-        echo "🔍 Finding scripts in $SCRIPT_DIR, excluding $EXCLUDE_DIR"
-        final_exit_code=0
-
-        while IFS= read -r script; do
-          if [ -f "$script" ]; then
-            echo "📦 Executing: $script"
-            echo "MAX_STEPS=$MAX_STEPS, EVAL_EVERY_N_STEPS=$EVAL_EVERY_N_STEPS"
-            chmod +x "$script"
-            if ! bash "$script" \
-                --rl_training_config.max_steps "$MAX_STEPS" \
-                --rl_training_config.eval_every_n_steps "$EVAL_EVERY_N_STEPS"; then
-              exit_code=$?
-              echo "❌ Failed to complete: $script (Exit Code: $exit_code)" >&2
-              final_exit_code=$exit_code
-              # Stop processing further scripts after the first failure
-              break
-            else
-              echo "✅ Successfully completed: $script"
-            fi
-          fi
-        done < <(find "$SCRIPT_DIR" -name "*.sh" -type f | grep -v "$SCRIPT_DIR/$EXCLUDE_DIR/")
-
-        if [ "$final_exit_code" -ne 0 ]; then
-          echo "🚫 One or more RL scripts failed. Exiting with code $final_exit_code." >&2
-          exit "$final_exit_code"
-        fi
-        echo "🎉 All RL scripts completed successfully."
+    - name: Run regression tests (shared)
+      uses: ./.github/actions/run_regression_tests
+      with:
+        hf_token: ${{ secrets.HF_TOKEN }}
 
   update_lkg_pr:
     needs: run_latest