diff --git a/training/a4x/llama3-1-405b/nemo-pretraining-gke/16node-FP8CS-GBS2048/recipe/README.md b/training/a4x/llama3-1-405b/nemo-pretraining-gke/16node-FP8CS-GBS2048/recipe/README.md
index 289f6b1..9b2701d 100644
--- a/training/a4x/llama3-1-405b/nemo-pretraining-gke/16node-FP8CS-GBS2048/recipe/README.md
+++ b/training/a4x/llama3-1-405b/nemo-pretraining-gke/16node-FP8CS-GBS2048/recipe/README.md
@@ -71,7 +71,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder.
 git clone https://github.com/ai-hypercomputer/gpu-recipes.git
 cd gpu-recipes
 export REPO_ROOT=`git rev-parse --show-toplevel`
-export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-405b
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-405b/nemo-pretraining-gke/16node-FP8CS-GBS2048/recipe 
 cd $RECIPE_ROOT
 ```
 
diff --git a/training/a4x/llama3-1-405b/nemo-pretraining-gke/32node-BF16-GBS64/recipe/README.md b/training/a4x/llama3-1-405b/nemo-pretraining-gke/32node-BF16-GBS64/recipe/README.md
index 28f525f..0733b2e 100644
--- a/training/a4x/llama3-1-405b/nemo-pretraining-gke/32node-BF16-GBS64/recipe/README.md
+++ b/training/a4x/llama3-1-405b/nemo-pretraining-gke/32node-BF16-GBS64/recipe/README.md
@@ -71,7 +71,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder.
 git clone https://github.com/ai-hypercomputer/gpu-recipes.git
 cd gpu-recipes
 export REPO_ROOT=`git rev-parse --show-toplevel`
-export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-405b/nemo-pretraining-gke/32_nodes
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-405b/nemo-pretraining-gke/32node-BF16-GBS64/recipe
 cd $RECIPE_ROOT
 ```
 
diff --git a/training/a4x/llama3-1-70b/nemo-pretraining-gke/16node-BF16-GBS2048/recipe/README.md b/training/a4x/llama3-1-70b/nemo-pretraining-gke/16node-BF16-GBS2048/recipe/README.md
index b402f3f..e133660 100644
--- a/training/a4x/llama3-1-70b/nemo-pretraining-gke/16node-BF16-GBS2048/recipe/README.md
+++ b/training/a4x/llama3-1-70b/nemo-pretraining-gke/16node-BF16-GBS2048/recipe/README.md
@@ -71,7 +71,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder.
 git clone https://github.com/ai-hypercomputer/gpu-recipes.git
 cd gpu-recipes
 export REPO_ROOT=`git rev-parse --show-toplevel`
-export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-70b/nemo-pretraining-gke/16_nodes
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-70b/nemo-pretraining-gke/16node-BF16-GBS2048/recipe
 cd $RECIPE_ROOT
 ```
 
diff --git a/training/a4x/llama3-1-70b/nemo-pretraining-gke/32node-FP8CS-GBS2048/recipe/README.md b/training/a4x/llama3-1-70b/nemo-pretraining-gke/32node-FP8CS-GBS2048/recipe/README.md
index ee39cd6..ca5d0d8 100644
--- a/training/a4x/llama3-1-70b/nemo-pretraining-gke/32node-FP8CS-GBS2048/recipe/README.md
+++ b/training/a4x/llama3-1-70b/nemo-pretraining-gke/32node-FP8CS-GBS2048/recipe/README.md
@@ -71,7 +71,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder.
 git clone https://github.com/ai-hypercomputer/gpu-recipes.git
 cd gpu-recipes
 export REPO_ROOT=`git rev-parse --show-toplevel`
-export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-70b/nemo-pretraining-gke/32_nodes
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-70b/nemo-pretraining-gke/32node-FP8CS-GBS2048/recipe
 cd $RECIPE_ROOT
 ```
 
diff --git a/training/a4x/llama3-1-70b/nemo-pretraining-gke/64node-FP8CS-GBS2048/recipe/README.md b/training/a4x/llama3-1-70b/nemo-pretraining-gke/64node-FP8CS-GBS2048/recipe/README.md
index ec95a07..139514a 100644
--- a/training/a4x/llama3-1-70b/nemo-pretraining-gke/64node-FP8CS-GBS2048/recipe/README.md
+++ b/training/a4x/llama3-1-70b/nemo-pretraining-gke/64node-FP8CS-GBS2048/recipe/README.md
@@ -67,11 +67,11 @@ Set the default project:
 
 Clone the `gpu-recipes` repository and set a reference to the recipe folder.
 
-```
+``` 
 git clone https://github.com/ai-hypercomputer/gpu-recipes.git
 cd gpu-recipes
 export REPO_ROOT=`git rev-parse --show-toplevel`
-export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-70b/nemo-pretraining-gke/64_nodes
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-70b/nemo-pretraining-gke/64node-FP8CS-GBS2048/recipe
 cd $RECIPE_ROOT
 ```
 
diff --git a/training/a4x/llama3-1-8b/nemo-pretraining-gke/16node-BF16-GBS1024/recipe/README.md b/training/a4x/llama3-1-8b/nemo-pretraining-gke/16node-BF16-GBS1024/recipe/README.md
index ed347fb..58d1e93 100644
--- a/training/a4x/llama3-1-8b/nemo-pretraining-gke/16node-BF16-GBS1024/recipe/README.md
+++ b/training/a4x/llama3-1-8b/nemo-pretraining-gke/16node-BF16-GBS1024/recipe/README.md
@@ -71,7 +71,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder.
 git clone https://github.com/ai-hypercomputer/gpu-recipes.git
 cd gpu-recipes
 export REPO_ROOT=`git rev-parse --show-toplevel`
-export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-8b/nemo-pretraining-gke/16_nodes
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-1-8b/nemo-pretraining-gke/16node-BF16-GBS1024/recipe
 cd $RECIPE_ROOT
 ```
 
diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/Chart.yaml b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/Chart.yaml
new file mode 100644
index 0000000..af46c11
--- /dev/null
+++ b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/Chart.yaml
@@ -0,0 +1,20 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v2
+name: a4_jobset_workload
+description: a4_jobset_workload
+type: application
+version: 0.1.0
+appVersion: "1.16.0"
diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/README.md b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/README.md
new file mode 100644
index 0000000..b7e3725
--- /dev/null
+++ b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/README.md
@@ -0,0 +1,151 @@
+<!-- mdformat global-off -->
+# Pretrain llama3-8b workloads on a4x GKE Node pools with Nvidia Megatron-Bridge Framework
+
+This recipe outlines the steps for running a llama3-8b pretraining
+workload on [a4x GKE Node pools](https://cloud.google.com/kubernetes-engine) by using the
+[NVIDIA Megatron-Bridge framework](https://github.com/NVIDIA-NeMo/Megatron-Bridge).
+
+## Orchestration and deployment tools
+
+For this recipe, the following setup is used:
+
+- Orchestration - [Google Kubernetes Engine (GKE)](https://cloud.google.com/kubernetes-engine)
+- Pretraining job configuration and deployment - A Helm chart is used to
+  configure and deploy the [Kubernetes Jobset](https://kubernetes.io/blog/2025/03/23/introducing-jobset) resource which manages the execution of the
+  [Megatron-Bridge pretraining workload](https://github.com/NVIDIA-NeMo/Megatron-Bridge).
+
+## Test environment
+
+This recipe has been optimized for and tested with the following configuration:
+
+- GKE cluster
+Please follow Cluster Toolkit [instructions](https://github.com/GoogleCloudPlatform/cluster-toolkit/tree/main/examples/gke-a4x)
+to create your a4x GKE cluster.
+
+## Training dataset
+
+This recipe uses a mock pretraining dataset provided by the Megatron-Bridge framework.
+
+## Docker container image
+
+This recipe uses the following docker images:
+
+- `nvcr.io/nvidia/nemo:25.11`
+- `us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib-arm64:v1.1.0`
+
+## Run the recipe
+
+From your client workstation, complete the following steps:
+
+### Configure environment settings
+
+Set the environment variables to match your environment:
+
+ ```bash
+ export PROJECT_ID=<PROJECT_ID>
+ export CLUSTER_REGION=<CLUSTER_REGION>
+ export CLUSTER_NAME=<CLUSTER_NAME>
+ export GCS_BUCKET=<GCS_BUCKET> # Note: path should not be prefixed with gs://
+ export KUEUE_NAME=<KUEUE_NAME>
+ ```
+
+Replace the following values:
+
+ - `<PROJECT_ID>`: your Google Cloud project ID.
+ - `<CLUSTER_REGION>`: the region where your cluster is located.
+ - `<CLUSTER_NAME>`: the name of your GKE cluster.
+ - `<GCS_BUCKET>`: the name of your Cloud Storage bucket. Don't include the `gs://` prefix.
+ - `<KUEUE_NAME>`: the name of the Kueue local queue. The default queue created by the cluster toolkit is `a4x`. Make sure to verify the name of the local queue in your cluster.
+
+Set the default project:
+
+ ```bash
+ gcloud config set project $PROJECT_ID
+ ```
+
+### Get the recipe
+
+Clone the `gpu-recipes` repository and set a reference to the recipe folder.
+
+```
+git clone https://github.com/ai-hypercomputer/gpu-recipes.git
+cd gpu-recipes
+export REPO_ROOT=`git rev-parse --show-toplevel`
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe
+cd $RECIPE_ROOT
+```
+
+### Get cluster credentials
+
+```
+gcloud container clusters get-credentials $CLUSTER_NAME --region $CLUSTER_REGION
+```
+
+### Configure and submit a pretraining job
+
+#### Using 2 node (8 gpus) fp8-cs precision
+To execute the job with the default settings, run the following command from
+your client:
+
+```bash
+cd $RECIPE_ROOT
+export WORKLOAD_NAME=$USER-a4x-llama3-8b-2node
+helm install $WORKLOAD_NAME . -f values.yaml \
+--set-file workload_launcher=launcher.sh \
+--set workload.image=nvcr.io/nvidia/nemo:25.11 \
+--set volumes.gcsMounts[0].bucketName=${GCS_BUCKET} \
+--set volumes.gcsMounts[0].mountPath=/job-logs \
+--set workload.envs[0].value=/job-logs/$WORKLOAD_NAME \
+--set queue=${KUEUE_NAME}
+```
+
+**Examples**
+
+-   To set the number of training steps to 100, run the following command from
+    your client:
+
+    ```bash
+    cd $RECIPE_ROOT
+    export WORKLOAD_NAME=$USER-a4x-llama3-8b-2node
+    helm install $WORKLOAD_NAME . -f values.yaml \
+    --set-file workload_launcher=launcher.sh \
+    --set workload.image=nvcr.io/nvidia/nemo:25.11 \
+    --set volumes.gcsMounts[0].bucketName=${GCS_BUCKET} \
+    --set volumes.gcsMounts[0].mountPath=/job-logs \
+    --set workload.envs[0].value=/job-logs/$WORKLOAD_NAME \
+    --set queue=${KUEUE_NAME} \
+    --set workload.arguments[0]="trainer.max_steps=100"
+    ```
+
+### Monitor the job
+
+To check the status of pods in your job, run the following command:
+
+```
+kubectl get pods | grep $USER-a4x-llama3-8b-2node
+```
+
+Replace the following:
+
+- JOB_NAME_PREFIX - your job name prefix. For example $USER-a4x-llama3-8b-2node.
+
+To get the logs for one of the pods, run the following command:
+
+```
+kubectl logs POD_NAME
+```
+
+Information about the training job's progress, including crucial details such as
+loss, step count, and step time, is generated by the rank 0 process.
+This process runs on the pod whose name begins with
+`JOB_NAME_PREFIX-workload-0-0`.
+For example: `$USER-a4x-llama3-8b-2node-workload-0-0-s9zrv`.
+
+### Uninstall the Helm release
+
+You can delete the job and other resources created by the Helm chart. To
+uninstall Helm, run the following command from your client:
+
+```bash
+helm uninstall $USER-a4x-llama3-8b-2node
+```
\ No newline at end of file
diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/launcher.sh b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/launcher.sh
new file mode 100644
index 0000000..39d2db4
--- /dev/null
+++ b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/launcher.sh
@@ -0,0 +1,135 @@
+usage()
+{
+cat << EOF
+usage: bash ./launcher.sh [config-override  [config-override ...]]
+config-override  (Optional) A  NeMo configuration override. E.g. trainer.max_steps=10000.
+EOF
+}
+
+parse_args() {
+  while [ "$1" != "" ]; do
+    case $(grep -o "=" <<< "$1" | wc -l) in
+        1  )
+        config_overrides+=("$1")
+        ;;
+        * )
+            echo "Invalid config override: $1"
+            usage
+            exit 1
+    esac
+    shift
+  done
+  config_overrides="${config_overrides[*]}"
+}
+
+config_overrides=()
+parse_args "$@"
+
+if [ -z "${config_overrides}" ]; then
+  echo "No NeMo config overrides specified"
+else
+  echo "NeMo config overrides:"
+  echo "  ${config_overrides}"
+fi
+
+export LD_LIBRARY_PATH="$NCCL_PLUGIN_PATH"
+ldconfig $LD_LIBRARY_PATH
+echo "Added $LD_LIBRARY_PATH to ldconfig:"
+ldconfig -p | grep libcuda | sed 's/^/  /'
+echo ""
+
+if [[ -n "${EXPLICIT_LOG_DIR}" ]]; then
+  explicit_log_dir=${EXPLICIT_LOG_DIR}
+else
+  explicit_log_dir=workload_logs
+fi
+echo "Logging to ${explicit_log_dir}"
+
+if [[ -n "${TOKENIZER_PATH}" ]]; then
+  echo "Getting tokenizer files"
+  cp ${TOKENIZER_PATH}/* .
+  echo ""
+fi
+
+echo "Launching Torch distributed on the node rank $JOB_COMPLETION_INDEX out of $NNODES nodes"
+
+pip install git+https://github.com/NVIDIA/dllogger#egg=dllogger
+
+# Create the nsys directory.
+mkdir -p ${explicit_log_dir}/nsys
+
+
+
+cd /opt
+rm -rf Megatron-Bridge
+git clone https://github.com/NVIDIA-NeMo/Megatron-Bridge.git
+cd Megatron-Bridge
+git checkout 7695d4acbfac19353d20e456509117efe4733d6b
+ls
+
+
+
+worker_command=$(cat <<- EOM
+  if [ "\$RANK" -eq "0" ]; then
+    echo "Worker 0 is stalling for a few seconds.." ;
+    sleep 3 ;
+    echo "The detected environment within worker rank 0 is:" ;
+    env | sed 's/^/  /' ;
+  fi ;
+
+  cd /opt/Megatron-Bridge ;
+
+  numactl \
+    --cpunodebind=\$((LOCAL_RANK/2)) \
+    --membind=\$((LOCAL_RANK/2))           nsys profile \
+    -t nvtx,cuda \
+    --cuda-event-trace=false \
+    --sample=none \
+    --capture-range=cudaProfilerApi \
+    --capture-range-end=stop \
+    --kill none \
+    -o /${explicit_log_dir}/$JOB_IDENTIFIER/rank-\$RANK \
+    --force-overwrite true \
+    --session-new "nsys-\$RANDOM-\$RANK" \
+  nice -10 \
+  python scripts/performance/run_script.py \
+    --gpu gb200 \
+    --model_family_name llama \
+    --model_recipe_name llama3_8b \
+    --num_gpus 8 \
+    --gpus_per_node 4 \
+    --compute_dtype fp8_cs \
+    --global_batch_size 128 \
+    --micro_batch_size 4 \
+    --seq_length 8192 \
+    --tensor_model_parallel_size 1 \
+    --pipeline_model_parallel_size 1 \
+    --context_parallel_size 1 \
+    --virtual_pipeline_model_parallel_size None \
+    --expert_model_parallel_size 1 \
+    --max_steps 50
+
+EOM
+)
+
+echo "$worker_command" > worker_command.sh
+chmod 777 worker_command.sh
+
+torchrun \
+--nproc-per-node="4" \
+--nnodes="2" \
+--node_rank="${JOB_COMPLETION_INDEX}" \
+--rdzv_id="${JOB_IDENTIFIER}" \
+--master_addr="${MASTER_ADDR}" \
+--master_port="${MASTER_PORT}" \
+--no-python bash worker_command.sh
+
+
+if [[ "$JOB_COMPLETION_INDEX" == "0" ]]; then
+  mkdir -p ${ARTIFACT_DIR}
+  cp -r ${explicit_log_dir}/* ${ARTIFACT_DIR}/
+  env > ${ARTIFACT_DIR}/environ.txt
+  ls ${ARTIFACT_DIR}
+fi
+echo "Training completed"
+echo "Pod on $(hostname --fqdn) is exiting"
\ No newline at end of file
diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/recipe_launch_command.sh b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/recipe_launch_command.sh
new file mode 100644
index 0000000..f78e94b
--- /dev/null
+++ b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/recipe_launch_command.sh
@@ -0,0 +1 @@
+helm install joeywan-ubench-759y . -f values.yaml --set-file workload_launcher=launcher.sh --set workload.image=nvcr.io/nvidia/nemo:25.11 --set volumes.gcsMounts[0].bucketName=ubench-logs --set volumes.gcsMounts[0].mountPath=/job-logs --set workload.envs[0].value=/job-logs/joeywan-ubench-759y --set queue=tas-lq
\ No newline at end of file
diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-config-configmap.yaml b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-config-configmap.yaml
new file mode 100644
index 0000000..a1d54ce
--- /dev/null
+++ b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-config-configmap.yaml
@@ -0,0 +1,28 @@
+# yamllint disable
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{- if .Values.workload.configFile }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: "{{ .Release.Name }}-config"
+data:
+  workload-configuration: |-
+{{- if .Values.workload_config }}
+{{ .Values.workload_config | nindent 4 }}
+{{- else }}
+{{ "config: null" | nindent 4 }}
+{{- end }}
+{{- end }}
diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-job.yaml b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-job.yaml
new file mode 100644
index 0000000..e2b6d54
--- /dev/null
+++ b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-job.yaml
@@ -0,0 +1,352 @@
+# yamllint disable
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{$timestamp := now | date "2006-01-02-15-04-05"}}
+{{$jobSuffix := randAlphaNum 4 | lower}}
+{{$jobuuid := uuidv4}}
+{{$nodes := div .Values.workload.gpus 4 | max 1}}
+{{$gpusPerNode := min .Values.workload.gpus 4}}
+{{- $root := . -}}
+apiVersion: resource.nvidia.com/v1beta1
+kind: ComputeDomain
+metadata:
+  name: "{{ .Release.Name }}-{{ $jobSuffix }}"
+spec:
+  numNodes: {{ $nodes }}
+  channel:
+    resourceClaimTemplate:
+      name: "{{ .Release.Name }}-{{ $jobSuffix }}"
+---
+apiVersion: jobset.x-k8s.io/v1alpha2
+kind: JobSet
+metadata:
+  name: "{{ .Release.Name }}"
+  namespace: default
+  labels:
+  {{- if $root.Values.queue }}
+    kueue.x-k8s.io/queue-name: "{{ $root.Values.queue }}"
+  {{- end }}
+spec:
+  {{- if $root.Values.queue }}
+  suspend: true
+  {{- end }}
+  failurePolicy:
+    maxRestarts: {{ default 0 $root.Values.workload.max_workload_restarts }}
+  replicatedJobs:
+  - name: workload
+    replicas: 1
+    template:
+      spec:
+        parallelism: {{ $nodes }}
+        completions: {{ $nodes }}
+        backoffLimit: 0
+        completionMode: Indexed
+        activeDeadlineSeconds: 14400 # 4 hours (4 * 60 * 60)
+        ttlSecondsAfterFinished: 43200 # 12 hours (12 * 60 * 60)
+        template:
+          metadata:
+            annotations:
+              kubectl.kubernetes.io/default-container: workload
+              {{- if $root.Values.volumes.gcsVolumes }}
+              gke-gcsfuse/volumes: "true"
+              gke-gcsfuse/cpu-limit: "0"
+              gke-gcsfuse/memory-limit: "0"
+              gke-gcsfuse/ephemeral-storage-limit: "0"
+              {{- end }}
+              {{- if $root.Values.volumes.psVolumes }}
+              gke-parallelstore/volumes: "true"
+              gke-parallelstore/cpu-limit: "0"
+              gke-parallelstore/memory-limit: "0"
+              {{- end }}
+              {{- if and $root.Values.queue $root.Values.tasSettings.topologyRequest }}
+              {{- toYaml .Values.tasSettings.topologyRequest | nindent 14 }}
+              {{- end }}
+              {{- if and $root.Values.queue $root.Values.dwsSettings.maxRunDurationSeconds }}
+              provreq.kueue.x-k8s.io/maxRunDurationSeconds: "{{ $root.Values.dwsSettings.maxRunDurationSeconds }}"
+              {{- end }}
+              {{- if not $root.Values.network.hostNetwork }}
+              networking.gke.io/default-interface: "eth0"
+              networking.gke.io/interfaces: |
+              {{- if $root.Values.network.subnetworks }}
+                [
+                  {{- range $i, $subnetwork := $root.Values.network.subnetworks }}
+                  {"interfaceName":"eth{{ $i }}","network":"{{ $subnetwork }}"}{{ eq $i 5 | ternary "" ","}}
+                  {{- end }}
+                ]
+              {{- else }}
+                [
+                  {"interfaceName":"eth0","network":"default"},
+                  {"interfaceName":"eth1","network":"gvnic-1"},
+                  {{- range  $i := until 4 }}
+                  {"interfaceName":"eth{{ add 2 $i }}","network":"rdma-{{ $i }}"}{{ eq $i 3 | ternary "" ","}}
+                  {{- end }}
+                ]
+              {{- end }}
+              {{- end }}
+          spec:
+            {{- if $root.Values.network.hostNetwork }}
+            hostNetwork: true
+            dnsPolicy: ClusterFirstWithHostNet
+            {{- end }}
+            subdomain: "{{.Release.Name}}"
+            restartPolicy: Never
+            {{- if $root.Values.targetNodes }}
+            affinity:
+              nodeAffinity:
+                requiredDuringSchedulingIgnoredDuringExecution:
+                  nodeSelectorTerms:
+                  - matchExpressions:
+                    - key: kubernetes.io/hostname
+                      operator: "In"
+                      values:
+                      {{- range $hostname := $root.Values.targetNodes }}
+                      - {{ $hostname }}
+                      {{- end }}
+            {{- end }}
+            {{- if $root.Values.avoidNodes }}
+            {{- if not $root.Values.targetNodes }}
+            affinity:
+              nodeAffinity:
+                requiredDuringSchedulingIgnoredDuringExecution:
+            {{- end }}
+                  nodeSelectorTerms:
+                  - matchExpressions:
+                    - key: kubernetes.io/hostname
+                      operator: "NotIn"
+                      values:
+                      {{- range $hostname := $root.Values.avoidNodes }}
+                      - {{ $hostname }}
+                      {{- end }}
+            {{- end }}
+            tolerations:
+            - operator: "Exists"
+              key: nvidia.com/gpu
+            - operator: "Exists"
+              key: cloud.google.com/impending-node-termination
+            - key: "kubernetes.io/arch"
+              operator: "Equal"
+              value: "arm64"
+              effect: "NoSchedule"
+
+            volumes:
+            {{ if $root.Values.network.gibVersion }}
+            - name: gib
+              emptyDir: {}
+            {{ end }}
+
+            {{- if $root.Values.workload.configFile }}
+            - name: workload-configuration
+              configMap:
+                name: "{{.Release.Name}}-config"
+                items:
+                - key: workload-configuration
+                  path: {{ $root.Values.workload.configFile | default "workload-configuration" }}
+            {{- end }}
+
+            - name: workload-launcher
+              configMap:
+                name: "{{.Release.Name}}-launcher"
+
+            - name: shared-memory
+              emptyDir:
+                medium: "Memory"
+                sizeLimit: 250Gi
+
+            {{- range $pvc := $root.Values.volumes.pvcMounts }}
+            - name: "{{ $pvc.claimName }}"
+              persistentVolumeClaim:
+                claimName: "{{ $pvc.claimName }}"
+            {{- end }}
+
+            {{- range $gcs := $root.Values.volumes.gcsMounts }}
+            - name: "{{ $gcs.bucketName }}"
+              csi:
+                driver: gcsfuse.csi.storage.gke.io
+                volumeAttributes:
+                  bucketName: "{{ $gcs.bucketName }}"
+                  {{- if $gcs.mountOptions }}
+                  mountOptions: "{{ $gcs.mountOptions }}"
+                  {{- end }}
+            {{- end}}
+
+            {{- if $root.Values.volumes.ssdMountPath }}
+            - name: local-ssd
+              hostPath:
+                path: /mnt/stateful_partition/kube-ephemeral-ssd
+            {{- end }}
+
+            initContainers:
+            {{ if $root.Values.network.gibVersion }}
+            - name: nccl-plugin-installer
+              image: {{ $root.Values.network.gibVersion }}
+              imagePullPolicy: Always
+              args:
+              - |
+                set -ex
+                /scripts/container_entry.sh install --install-nccl
+                cp -R /var/lib/gib/lib64/. /target/usr/local/gib/lib64
+                cp -R /var/lib/gib/. /target/usr/local/gib
+              command:
+              - /bin/sh
+              - -c
+              volumeMounts:
+              - mountPath: /target/usr/local/gib
+                name: gib
+            {{ end}}
+
+            resourceClaims:
+            - name: compute-domain-channel
+              resourceClaimTemplateName: "{{ .Release.Name }}-{{ $jobSuffix }}"
+
+            containers:
+            {{- if $root.Values.workload.gcsSidecarImage }}
+            - name: gke-gcsfuse-sidecar
+              image: {{ $root.Values.workload.gcsSidecarImage }}
+            - name: gke-gcsfuse-metadata-prefetch
+              image: {{ $root.Values.workload.gcsSidecarImage }}
+            {{- end }}
+            {{- if $root.Values.workload.psSidecarImage }}
+            - name: gke-parallelstore-sidecar
+              image: {{ $root.Values.workload.psSidecarImage }}
+            {{- end }}
+
+            - name: workload
+              image: "{{ $root.Values.workload.image }}"
+              imagePullPolicy: Always
+              {{- if $root.Values.network.hostNetwork }}
+              securityContext:
+                privileged: true
+              {{- end }}
+              env:
+              - name: JOB_IDENTIFIER
+                value: "{{ .Release.Name }}-{{ $timestamp }}"
+              - name: JOB_TIMESTAMP
+                value: "{{ $timestamp }}"
+              - name: JOB_UUID
+                value: "{{ $jobuuid }}"
+              - name: JOB_ORCHESTRATOR
+                value: "gke"
+              # Add RANK based on the pod's index provided by the Indexed Job
+              # This is crucial for torch.distributed initialization.
+              - name: JOB_COMPLETION_INDEX
+                valueFrom:
+                  fieldRef:
+                    fieldPath: metadata.annotations['batch.kubernetes.io/job-completion-index']
+              - name: RANK_0_FQDN
+                value: "{{.Release.Name}}-workload-0-0.{{.Release.Name}}.default.svc.cluster.local"
+              - name: HOSTNAME_PREFIX
+                value: "{{.Release.Name}}-workload-"
+              - name: DOMAIN_NAME
+                value: "{{.Release.Name}}.default.svc.cluster.local"
+              - name: MASTER_ADDR
+                value: "{{.Release.Name}}-workload-0-0.{{.Release.Name}}.default.svc.cluster.local"
+              - name: MASTER_PORT
+                value: "6002"
+              - name: WORLD_SIZE
+                value: "{{ $root.Values.workload.gpus }}"
+              - name: NNODES
+                value: "{{ $nodes }}"
+              - name: GPUS_PER_NODE
+                value: "{{ $gpusPerNode }}"
+
+              - name: NCCL_PLUGIN_PATH
+                value: /usr/local/gib/lib64
+
+              {{ if $root.Values.network.gibVersion }}
+              - name: NCCL_INIT_SCRIPT
+                value: "/usr/local/gib/scripts/set_nccl_env.sh"
+              {{ end }}
+
+              {{ if $root.Values.network.ncclSettings }}
+              {{- toYaml .Values.network.ncclSettings | nindent 14 }}
+              {{ end }}
+
+              {{ if $root.Values.workload.envs }}
+              {{- toYaml .Values.workload.envs | nindent 14 }}
+              {{ end }}
+
+              command:
+              - bash
+              - -c
+              - |
+                echo "Pod on $(hostname --fqdn) is running"
+                echo "Pod is assigned job index of $JOB_COMPLETION_INDEX"
+
+                if [[ -n "${NCCL_INIT_SCRIPT}" ]]; then
+                  echo "Running NCCL init script: ${NCCL_INIT_SCRIPT}"
+                  source ${NCCL_INIT_SCRIPT}
+                fi
+
+                # Overriding NCCL_SOCKET_IFNAME definition
+                export NCCL_SOCKET_IFNAME="eth0,eth1"
+                export NCCL_TUNER_CONFIG_PATH=/usr/local/gib/configs/tuner_config_a3u.txtpb
+
+                echo "Launching workload with the following arguments:"
+                {{- range $root.Values.workload.defaultArguments }}
+                echo "  {{ . }}"
+                {{- end }}
+                {{- range $root.Values.workload.arguments }}
+                echo "  {{ . }}"
+                {{- end }}
+                echo ""
+
+                sleep 10
+
+                bash /workload/launcher/launch-workload.sh \
+                {{- range $root.Values.workload.defaultArguments }}
+                {{ . }} \
+                {{- end }}
+                {{- range $root.Values.workload.arguments }}
+                {{ . }} \
+                {{- end }}
+
+
+              volumeMounts:
+                {{ if $root.Values.network.gibVersion }}
+                - name: gib
+                  mountPath: /usr/local/gib
+                {{ end }}
+
+                {{- if $root.Values.workload.configFile }}
+                - name: workload-configuration
+                  mountPath: {{ $root.Values.workload.configPath | default "/workload/configs" }}
+                {{- end }}
+
+                - name: workload-launcher
+                  mountPath: /workload/launcher
+
+                - name: shared-memory
+                  mountPath: /dev/shm
+
+                {{- range $pvc := $root.Values.volumes.pvcMounts }}
+                - name: "{{ $pvc.claimName }}"
+                  mountPath: "{{ $pvc.mountPath }}"
+                {{- end }}
+
+                {{- range $gcs := $root.Values.volumes.gcsMounts }}
+                - name: "{{ $gcs.bucketName }}"
+                  mountPath: "{{ $gcs.mountPath }}"
+                {{- end }}
+
+                {{- if $root.Values.volumes.ssdMountPath }}
+                - name: local-ssd
+                  mountPath: "{{ $root.Values.volumes.ssdMountPath }}"
+                {{- end }}
+
+              resources:
+                limits:
+                  nvidia.com/gpu: {{ $gpusPerNode }}
+                claims:
+                - name: compute-domain-channel
diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-launcher-configmap.yaml b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-launcher-configmap.yaml
new file mode 100644
index 0000000..7026e0f
--- /dev/null
+++ b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-launcher-configmap.yaml
@@ -0,0 +1,28 @@
+# yamllint disable
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: "{{ .Release.Name }}-launcher"
+data:
+  launch-workload.sh: |-
+{{- if .Values.workload_launcher }}
+{{ .Values.workload_launcher | nindent 4 }}
+{{- else }}
+    #!/bin/bash
+    echo "No workload launcher specified"
+    exit 1
+{{- end }}
diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-svc.yaml b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-svc.yaml
new file mode 100644
index 0000000..7cfe220
--- /dev/null
+++ b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/templates/workload-svc.yaml
@@ -0,0 +1,22 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: "{{ .Release.Name }}"
+spec:
+  clusterIP: None
+  selector:
+    jobset.sigs.k8s.io/jobset-name: "{{ .Release.Name }}"
diff --git a/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/values.yaml b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/values.yaml
new file mode 100644
index 0000000..2727823
--- /dev/null
+++ b/training/a4x/llama3-8b/megatron-bridge-pretraining-gke/2node-FP8CS-GBS128/recipe/values.yaml
@@ -0,0 +1,31 @@
+dwsSettings:
+  maxRunDurationSeconds: null
+network:
+  gibVersion: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib-arm64:v1.1.0
+  hostNetwork: true
+  ncclSettings:
+  - name: NCCL_DEBUG
+    value: WARN
+  subnetworks[]: null
+queue: null
+tasSettings:
+  topologyRequest:
+    kueue.x-k8s.io/podset-preferred-topology: kubernetes.io/hostname
+volumes:
+  gcsMounts:
+  - bucketName: null
+    mountPath: null
+  gcsVolumes: true
+  psVolumes: false
+workload:
+  arguments[]: null
+  configFile: null
+  configPath: null
+  defaultArguments[]: null
+  envs:
+  - name: ARTIFACT_DIR
+    value: null
+  - name: GLOO_SOCKET_IFNAME
+    value: eth0
+  gpus: 8
+  image: nvcr.io/nvidia/nemo:25.11
diff --git a/training/a4x/nemotron4-340B/nemo-pretraining-gke/16node-FP8CS-GBS256/recipe/README.md b/training/a4x/nemotron4-340B/nemo-pretraining-gke/16node-FP8CS-GBS256/recipe/README.md
index 309fae5..0c47c87 100644
--- a/training/a4x/nemotron4-340B/nemo-pretraining-gke/16node-FP8CS-GBS256/recipe/README.md
+++ b/training/a4x/nemotron4-340B/nemo-pretraining-gke/16node-FP8CS-GBS256/recipe/README.md
@@ -71,7 +71,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder.
 git clone https://github.com/ai-hypercomputer/gpu-recipes.git
 cd gpu-recipes
 export REPO_ROOT=`git rev-parse --show-toplevel`
-export RECIPE_ROOT=$REPO_ROOT/training/a4x/nemotron4-340b/nemo-pretraining-gke/16_nodes
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/nemotron4-340B/nemo-pretraining-gke/16node-FP8CS-GBS256/recipe
 cd $RECIPE_ROOT
 ```
 
diff --git a/training/a4x/nemotron4-340B/nemo-pretraining-gke/32node-FP8CS-GBS256/recipe/README.md b/training/a4x/nemotron4-340B/nemo-pretraining-gke/32node-FP8CS-GBS256/recipe/README.md
index 9b31d85..69d5ab0 100644
--- a/training/a4x/nemotron4-340B/nemo-pretraining-gke/32node-FP8CS-GBS256/recipe/README.md
+++ b/training/a4x/nemotron4-340B/nemo-pretraining-gke/32node-FP8CS-GBS256/recipe/README.md
@@ -71,7 +71,7 @@ Clone the `gpu-recipes` repository and set a reference to the recipe folder.
 git clone https://github.com/ai-hypercomputer/gpu-recipes.git
 cd gpu-recipes
 export REPO_ROOT=`git rev-parse --show-toplevel`
-export RECIPE_ROOT=$REPO_ROOT/training/a4x/nemotron4-340b/nemo-pretraining-gke/32_nodes
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/nemotron4-340B/nemo-pretraining-gke/32node-FP8CS-GBS256/recipe
 cd $RECIPE_ROOT
 ```
 
diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/Chart.yaml b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/Chart.yaml
new file mode 100644
index 0000000..af46c11
--- /dev/null
+++ b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/Chart.yaml
@@ -0,0 +1,20 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v2
+name: a4_jobset_workload
+description: a4_jobset_workload
+type: application
+version: 0.1.0
+appVersion: "1.16.0"
diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/README.md b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/README.md
new file mode 100644
index 0000000..c296fdd
--- /dev/null
+++ b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/README.md
@@ -0,0 +1,151 @@
+<!-- mdformat global-off -->
+# Pretrain qwen3-30b workloads on a4x GKE Node pools with Nvidia Megatron-Bridge Framework
+
+This recipe outlines the steps for running a qwen3-30b pretraining
+workload on [a4x GKE Node pools](https://cloud.google.com/kubernetes-engine) by using the
+[NVIDIA Megatron-Bridge framework](https://github.com/NVIDIA-NeMo/Megatron-Bridge).
+
+## Orchestration and deployment tools
+
+For this recipe, the following setup is used:
+
+- Orchestration - [Google Kubernetes Engine (GKE)](https://cloud.google.com/kubernetes-engine)
+- Pretraining job configuration and deployment - A Helm chart is used to
+  configure and deploy the [Kubernetes Jobset](https://kubernetes.io/blog/2025/03/23/introducing-jobset) resource which manages the execution of the
+  [Megatron-Bridge pretraining workload](https://github.com/NVIDIA-NeMo/Megatron-Bridge).
+
+## Test environment
+
+This recipe has been optimized for and tested with the following configuration:
+
+- GKE cluster
+Please follow Cluster Toolkit [instructions](https://github.com/GoogleCloudPlatform/cluster-toolkit/tree/main/examples/gke-a4x)
+to create your a4x GKE cluster.
+
+## Training dataset
+
+This recipe uses a mock pretraining dataset provided by the Megatron-Bridge framework.
+
+## Docker container image
+
+This recipe uses the following docker images:
+
+- `nvcr.io/nvidia/nemo:25.11`
+- `us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib-arm64:v1.1.0`
+
+## Run the recipe
+
+From your client workstation, complete the following steps:
+
+### Configure environment settings
+
+Set the environment variables to match your environment:
+
+ ```bash
+ export PROJECT_ID=<PROJECT_ID>
+ export CLUSTER_REGION=<CLUSTER_REGION>
+ export CLUSTER_NAME=<CLUSTER_NAME>
+ export GCS_BUCKET=<GCS_BUCKET> # Note: path should not be prefixed with gs://
+ export KUEUE_NAME=<KUEUE_NAME>
+ ```
+
+Replace the following values:
+
+ - `<PROJECT_ID>`: your Google Cloud project ID.
+ - `<CLUSTER_REGION>`: the region where your cluster is located.
+ - `<CLUSTER_NAME>`: the name of your GKE cluster.
+ - `<GCS_BUCKET>`: the name of your Cloud Storage bucket. Don't include the `gs://` prefix.
+ - `<KUEUE_NAME>`: the name of the Kueue local queue. The default queue created by the cluster toolkit is `a4x`. Make sure to verify the name of the local queue in your cluster.
+
+Set the default project:
+
+ ```bash
+ gcloud config set project $PROJECT_ID
+ ```
+
+### Get the recipe
+
+Clone the `gpu-recipes` repository and set a reference to the recipe folder.
+
+```
+git clone https://github.com/ai-hypercomputer/gpu-recipes.git
+cd gpu-recipes
+export REPO_ROOT=`git rev-parse --show-toplevel`
+export RECIPE_ROOT=$REPO_ROOT/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe
+cd $RECIPE_ROOT
+```
+
+### Get cluster credentials
+
+```
+gcloud container clusters get-credentials $CLUSTER_NAME --region $CLUSTER_REGION
+```
+
+### Configure and submit a pretraining job
+
+#### Using 2 node (8 gpus) fp8-mx precision
+To execute the job with the default settings, run the following command from
+your client:
+
+```bash
+cd $RECIPE_ROOT
+export WORKLOAD_NAME=$USER-a4x-qwen3-30b-2node
+helm install $WORKLOAD_NAME . -f values.yaml \
+--set-file workload_launcher=launcher.sh \
+--set workload.image=nvcr.io/nvidia/nemo:25.11 \
+--set volumes.gcsMounts[0].bucketName=${GCS_BUCKET} \
+--set volumes.gcsMounts[0].mountPath=/job-logs \
+--set workload.envs[0].value=/job-logs/$WORKLOAD_NAME \
+--set queue=${KUEUE_NAME}
+```
+
+**Examples**
+
+-   To set the number of training steps to 100, run the following command from
+    your client:
+
+    ```bash
+    cd $RECIPE_ROOT
+    export WORKLOAD_NAME=$USER-a4x-qwen3-30b-2node
+    helm install $WORKLOAD_NAME . -f values.yaml \
+    --set-file workload_launcher=launcher.sh \
+    --set workload.image=nvcr.io/nvidia/nemo:25.11 \
+    --set volumes.gcsMounts[0].bucketName=${GCS_BUCKET} \
+    --set volumes.gcsMounts[0].mountPath=/job-logs \
+    --set workload.envs[0].value=/job-logs/$WORKLOAD_NAME \
+    --set queue=${KUEUE_NAME} \
+    --set workload.arguments[0]="trainer.max_steps=100"
+    ```
+
+### Monitor the job
+
+To check the status of pods in your job, run the following command:
+
+```
+kubectl get pods | grep $USER-a4x-qwen3-30b-2node
+```
+
+Replace the following:
+
+- JOB_NAME_PREFIX - your job name prefix. For example $USER-a4x-qwen3-30b-2node.
+
+To get the logs for one of the pods, run the following command:
+
+```
+kubectl logs POD_NAME
+```
+
+Information about the training job's progress, including crucial details such as
+loss, step count, and step time, is generated by the rank 0 process.
+This process runs on the pod whose name begins with
+`JOB_NAME_PREFIX-workload-0-0`.
+For example: `$USER-a4x-qwen3-30b-2node-workload-0-0-s9zrv`.
+
+### Uninstall the Helm release
+
+You can delete the job and other resources created by the Helm chart. To
+uninstall Helm, run the following command from your client:
+
+```bash
+helm uninstall $USER-a4x-qwen3-30b-2node
+```
\ No newline at end of file
diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/launcher.sh b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/launcher.sh
new file mode 100644
index 0000000..ce1a301
--- /dev/null
+++ b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/launcher.sh
@@ -0,0 +1,135 @@
+usage()
+{
+cat << EOF
+usage: bash ./launcher.sh [config-override  [config-override ...]]
+config-override  (Optional) A  NeMo configuration override. E.g. trainer.max_steps=10000.
+EOF
+}
+
+parse_args() {
+  while [ "$1" != "" ]; do
+    case $(grep -o "=" <<< "$1" | wc -l) in
+        1  )
+        config_overrides+=("$1")
+        ;;
+        * )
+            echo "Invalid config override: $1"
+            usage
+            exit 1
+    esac
+    shift
+  done
+  config_overrides="${config_overrides[*]}"
+}
+
+config_overrides=()
+parse_args "$@"
+
+if [ -z "${config_overrides}" ]; then
+  echo "No NeMo config overrides specified"
+else
+  echo "NeMo config overrides:"
+  echo "  ${config_overrides}"
+fi
+
+export LD_LIBRARY_PATH="$NCCL_PLUGIN_PATH"
+ldconfig $LD_LIBRARY_PATH
+echo "Added $LD_LIBRARY_PATH to ldconfig:"
+ldconfig -p | grep libcuda | sed 's/^/  /'
+echo ""
+
+if [[ -n "${EXPLICIT_LOG_DIR}" ]]; then
+  explicit_log_dir=${EXPLICIT_LOG_DIR}
+else
+  explicit_log_dir=workload_logs
+fi
+echo "Logging to ${explicit_log_dir}"
+
+if [[ -n "${TOKENIZER_PATH}" ]]; then
+  echo "Getting tokenizer files"
+  cp ${TOKENIZER_PATH}/* .
+  echo ""
+fi
+
+echo "Launching Torch distributed on the node rank $JOB_COMPLETION_INDEX out of $NNODES nodes"
+
+pip install git+https://github.com/NVIDIA/dllogger#egg=dllogger
+
+# Create the nsys directory.
+mkdir -p ${explicit_log_dir}/nsys
+
+
+
+cd /opt
+rm -rf Megatron-Bridge
+git clone https://github.com/NVIDIA-NeMo/Megatron-Bridge.git
+cd Megatron-Bridge
+git checkout 7695d4acbfac19353d20e456509117efe4733d6b
+ls
+
+
+
+worker_command=$(cat <<- EOM
+  if [ "\$RANK" -eq "0" ]; then
+    echo "Worker 0 is stalling for a few seconds.." ;
+    sleep 3 ;
+    echo "The detected environment within worker rank 0 is:" ;
+    env | sed 's/^/  /' ;
+  fi ;
+
+  cd /opt/Megatron-Bridge ;
+
+  numactl \
+    --cpunodebind=\$((LOCAL_RANK/2)) \
+    --membind=\$((LOCAL_RANK/2))           nsys profile \
+    -t nvtx,cuda \
+    --cuda-event-trace=false \
+    --sample=none \
+    --capture-range=cudaProfilerApi \
+    --capture-range-end=stop \
+    --kill none \
+    -o /${explicit_log_dir}/$JOB_IDENTIFIER/rank-\$RANK \
+    --force-overwrite true \
+    --session-new "nsys-\$RANDOM-\$RANK" \
+  nice -10 \
+  python scripts/performance/run_script.py \
+    --gpu gb200 \
+    --model_family_name qwen \
+    --model_recipe_name qwen3_30b_a3b \
+    --num_gpus 8 \
+    --gpus_per_node 4 \
+    --compute_dtype fp8_mx \
+    --global_batch_size 1024 \
+    --micro_batch_size 4 \
+    --seq_length 4096 \
+    --tensor_model_parallel_size 1 \
+    --pipeline_model_parallel_size 1 \
+    --context_parallel_size 1 \
+    --virtual_pipeline_model_parallel_size None \
+    --expert_model_parallel_size 8 \
+    --max_steps 50
+
+EOM
+)
+
+echo "$worker_command" > worker_command.sh
+chmod 777 worker_command.sh
+
+torchrun \
+--nproc-per-node="4" \
+--nnodes="2" \
+--node_rank="${JOB_COMPLETION_INDEX}" \
+--rdzv_id="${JOB_IDENTIFIER}" \
+--master_addr="${MASTER_ADDR}" \
+--master_port="${MASTER_PORT}" \
+--no-python bash worker_command.sh
+
+
+if [[ "$JOB_COMPLETION_INDEX" == "0" ]]; then
+  mkdir -p ${ARTIFACT_DIR}
+  cp -r ${explicit_log_dir}/* ${ARTIFACT_DIR}/
+  env > ${ARTIFACT_DIR}/environ.txt
+  ls ${ARTIFACT_DIR}
+fi
+echo "Training completed"
+echo "Pod on $(hostname --fqdn) is exiting"
\ No newline at end of file
diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/recipe_launch_command.sh b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/recipe_launch_command.sh
new file mode 100644
index 0000000..d4ff835
--- /dev/null
+++ b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/recipe_launch_command.sh
@@ -0,0 +1 @@
+helm install joeywan-ubench-3n5e . -f values.yaml --set-file workload_launcher=launcher.sh --set workload.image=nvcr.io/nvidia/nemo:25.11 --set volumes.gcsMounts[0].bucketName=ubench-logs --set volumes.gcsMounts[0].mountPath=/job-logs --set workload.envs[0].value=/job-logs/joeywan-ubench-3n5e --set queue=tas-lq
\ No newline at end of file
diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-config-configmap.yaml b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-config-configmap.yaml
new file mode 100644
index 0000000..a1d54ce
--- /dev/null
+++ b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-config-configmap.yaml
@@ -0,0 +1,28 @@
+# yamllint disable
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{- if .Values.workload.configFile }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: "{{ .Release.Name }}-config"
+data:
+  workload-configuration: |-
+{{- if .Values.workload_config }}
+{{ .Values.workload_config | nindent 4 }}
+{{- else }}
+{{ "config: null" | nindent 4 }}
+{{- end }}
+{{- end }}
diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-job.yaml b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-job.yaml
new file mode 100644
index 0000000..e2b6d54
--- /dev/null
+++ b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-job.yaml
@@ -0,0 +1,352 @@
+# yamllint disable
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{{$timestamp := now | date "2006-01-02-15-04-05"}}
+{{$jobSuffix := randAlphaNum 4 | lower}}
+{{$jobuuid := uuidv4}}
+{{$nodes := div .Values.workload.gpus 4 | max 1}}
+{{$gpusPerNode := min .Values.workload.gpus 4}}
+{{- $root := . -}}
+apiVersion: resource.nvidia.com/v1beta1
+kind: ComputeDomain
+metadata:
+  name: "{{ .Release.Name }}-{{ $jobSuffix }}"
+spec:
+  numNodes: {{ $nodes }}
+  channel:
+    resourceClaimTemplate:
+      name: "{{ .Release.Name }}-{{ $jobSuffix }}"
+---
+apiVersion: jobset.x-k8s.io/v1alpha2
+kind: JobSet
+metadata:
+  name: "{{ .Release.Name }}"
+  namespace: default
+  labels:
+  {{- if $root.Values.queue }}
+    kueue.x-k8s.io/queue-name: "{{ $root.Values.queue }}"
+  {{- end }}
+spec:
+  {{- if $root.Values.queue }}
+  suspend: true
+  {{- end }}
+  failurePolicy:
+    maxRestarts: {{ default 0 $root.Values.workload.max_workload_restarts }}
+  replicatedJobs:
+  - name: workload
+    replicas: 1
+    template:
+      spec:
+        parallelism: {{ $nodes }}
+        completions: {{ $nodes }}
+        backoffLimit: 0
+        completionMode: Indexed
+        activeDeadlineSeconds: 14400 # 4 hours (4 * 60 * 60)
+        ttlSecondsAfterFinished: 43200 # 12 hours (12 * 60 * 60)
+        template:
+          metadata:
+            annotations:
+              kubectl.kubernetes.io/default-container: workload
+              {{- if $root.Values.volumes.gcsVolumes }}
+              gke-gcsfuse/volumes: "true"
+              gke-gcsfuse/cpu-limit: "0"
+              gke-gcsfuse/memory-limit: "0"
+              gke-gcsfuse/ephemeral-storage-limit: "0"
+              {{- end }}
+              {{- if $root.Values.volumes.psVolumes }}
+              gke-parallelstore/volumes: "true"
+              gke-parallelstore/cpu-limit: "0"
+              gke-parallelstore/memory-limit: "0"
+              {{- end }}
+              {{- if and $root.Values.queue $root.Values.tasSettings.topologyRequest }}
+              {{- toYaml .Values.tasSettings.topologyRequest | nindent 14 }}
+              {{- end }}
+              {{- if and $root.Values.queue $root.Values.dwsSettings.maxRunDurationSeconds }}
+              provreq.kueue.x-k8s.io/maxRunDurationSeconds: "{{ $root.Values.dwsSettings.maxRunDurationSeconds }}"
+              {{- end }}
+              {{- if not $root.Values.network.hostNetwork }}
+              networking.gke.io/default-interface: "eth0"
+              networking.gke.io/interfaces: |
+              {{- if $root.Values.network.subnetworks }}
+                [
+                  {{- range $i, $subnetwork := $root.Values.network.subnetworks }}
+                  {"interfaceName":"eth{{ $i }}","network":"{{ $subnetwork }}"}{{ eq $i 5 | ternary "" ","}}
+                  {{- end }}
+                ]
+              {{- else }}
+                [
+                  {"interfaceName":"eth0","network":"default"},
+                  {"interfaceName":"eth1","network":"gvnic-1"},
+                  {{- range  $i := until 4 }}
+                  {"interfaceName":"eth{{ add 2 $i }}","network":"rdma-{{ $i }}"}{{ eq $i 3 | ternary "" ","}}
+                  {{- end }}
+                ]
+              {{- end }}
+              {{- end }}
+          spec:
+            {{- if $root.Values.network.hostNetwork }}
+            hostNetwork: true
+            dnsPolicy: ClusterFirstWithHostNet
+            {{- end }}
+            subdomain: "{{.Release.Name}}"
+            restartPolicy: Never
+            {{- if $root.Values.targetNodes }}
+            affinity:
+              nodeAffinity:
+                requiredDuringSchedulingIgnoredDuringExecution:
+                  nodeSelectorTerms:
+                  - matchExpressions:
+                    - key: kubernetes.io/hostname
+                      operator: "In"
+                      values:
+                      {{- range $hostname := $root.Values.targetNodes }}
+                      - {{ $hostname }}
+                      {{- end }}
+            {{- end }}
+            {{- if $root.Values.avoidNodes }}
+            {{- if not $root.Values.targetNodes }}
+            affinity:
+              nodeAffinity:
+                requiredDuringSchedulingIgnoredDuringExecution:
+            {{- end }}
+                  nodeSelectorTerms:
+                  - matchExpressions:
+                    - key: kubernetes.io/hostname
+                      operator: "NotIn"
+                      values:
+                      {{- range $hostname := $root.Values.avoidNodes }}
+                      - {{ $hostname }}
+                      {{- end }}
+            {{- end }}
+            tolerations:
+            - operator: "Exists"
+              key: nvidia.com/gpu
+            - operator: "Exists"
+              key: cloud.google.com/impending-node-termination
+            - key: "kubernetes.io/arch"
+              operator: "Equal"
+              value: "arm64"
+              effect: "NoSchedule"
+
+            volumes:
+            {{ if $root.Values.network.gibVersion }}
+            - name: gib
+              emptyDir: {}
+            {{ end }}
+
+            {{- if $root.Values.workload.configFile }}
+            - name: workload-configuration
+              configMap:
+                name: "{{.Release.Name}}-config"
+                items:
+                - key: workload-configuration
+                  path: {{ $root.Values.workload.configFile | default "workload-configuration" }}
+            {{- end }}
+
+            - name: workload-launcher
+              configMap:
+                name: "{{.Release.Name}}-launcher"
+
+            - name: shared-memory
+              emptyDir:
+                medium: "Memory"
+                sizeLimit: 250Gi
+
+            {{- range $pvc := $root.Values.volumes.pvcMounts }}
+            - name: "{{ $pvc.claimName }}"
+              persistentVolumeClaim:
+                claimName: "{{ $pvc.claimName }}"
+            {{- end }}
+
+            {{- range $gcs := $root.Values.volumes.gcsMounts }}
+            - name: "{{ $gcs.bucketName }}"
+              csi:
+                driver: gcsfuse.csi.storage.gke.io
+                volumeAttributes:
+                  bucketName: "{{ $gcs.bucketName }}"
+                  {{- if $gcs.mountOptions }}
+                  mountOptions: "{{ $gcs.mountOptions }}"
+                  {{- end }}
+            {{- end}}
+
+            {{- if $root.Values.volumes.ssdMountPath }}
+            - name: local-ssd
+              hostPath:
+                path: /mnt/stateful_partition/kube-ephemeral-ssd
+            {{- end }}
+
+            initContainers:
+            {{ if $root.Values.network.gibVersion }}
+            - name: nccl-plugin-installer
+              image: {{ $root.Values.network.gibVersion }}
+              imagePullPolicy: Always
+              args:
+              - |
+                set -ex
+                /scripts/container_entry.sh install --install-nccl
+                cp -R /var/lib/gib/lib64/. /target/usr/local/gib/lib64
+                cp -R /var/lib/gib/. /target/usr/local/gib
+              command:
+              - /bin/sh
+              - -c
+              volumeMounts:
+              - mountPath: /target/usr/local/gib
+                name: gib
+            {{ end}}
+
+            resourceClaims:
+            - name: compute-domain-channel
+              resourceClaimTemplateName: "{{ .Release.Name }}-{{ $jobSuffix }}"
+
+            containers:
+            {{- if $root.Values.workload.gcsSidecarImage }}
+            - name: gke-gcsfuse-sidecar
+              image: {{ $root.Values.workload.gcsSidecarImage }}
+            - name: gke-gcsfuse-metadata-prefetch
+              image: {{ $root.Values.workload.gcsSidecarImage }}
+            {{- end }}
+            {{- if $root.Values.workload.psSidecarImage }}
+            - name: gke-parallelstore-sidecar
+              image: {{ $root.Values.workload.psSidecarImage }}
+            {{- end }}
+
+            - name: workload
+              image: "{{ $root.Values.workload.image }}"
+              imagePullPolicy: Always
+              {{- if $root.Values.network.hostNetwork }}
+              securityContext:
+                privileged: true
+              {{- end }}
+              env:
+              - name: JOB_IDENTIFIER
+                value: "{{ .Release.Name }}-{{ $timestamp }}"
+              - name: JOB_TIMESTAMP
+                value: "{{ $timestamp }}"
+              - name: JOB_UUID
+                value: "{{ $jobuuid }}"
+              - name: JOB_ORCHESTRATOR
+                value: "gke"
+              # Add RANK based on the pod's index provided by the Indexed Job
+              # This is crucial for torch.distributed initialization.
+              - name: JOB_COMPLETION_INDEX
+                valueFrom:
+                  fieldRef:
+                    fieldPath: metadata.annotations['batch.kubernetes.io/job-completion-index']
+              - name: RANK_0_FQDN
+                value: "{{.Release.Name}}-workload-0-0.{{.Release.Name}}.default.svc.cluster.local"
+              - name: HOSTNAME_PREFIX
+                value: "{{.Release.Name}}-workload-"
+              - name: DOMAIN_NAME
+                value: "{{.Release.Name}}.default.svc.cluster.local"
+              - name: MASTER_ADDR
+                value: "{{.Release.Name}}-workload-0-0.{{.Release.Name}}.default.svc.cluster.local"
+              - name: MASTER_PORT
+                value: "6002"
+              - name: WORLD_SIZE
+                value: "{{ $root.Values.workload.gpus }}"
+              - name: NNODES
+                value: "{{ $nodes }}"
+              - name: GPUS_PER_NODE
+                value: "{{ $gpusPerNode }}"
+
+              - name: NCCL_PLUGIN_PATH
+                value: /usr/local/gib/lib64
+
+              {{ if $root.Values.network.gibVersion }}
+              - name: NCCL_INIT_SCRIPT
+                value: "/usr/local/gib/scripts/set_nccl_env.sh"
+              {{ end }}
+
+              {{ if $root.Values.network.ncclSettings }}
+              {{- toYaml .Values.network.ncclSettings | nindent 14 }}
+              {{ end }}
+
+              {{ if $root.Values.workload.envs }}
+              {{- toYaml .Values.workload.envs | nindent 14 }}
+              {{ end }}
+
+              command:
+              - bash
+              - -c
+              - |
+                echo "Pod on $(hostname --fqdn) is running"
+                echo "Pod is assigned job index of $JOB_COMPLETION_INDEX"
+
+                if [[ -n "${NCCL_INIT_SCRIPT}" ]]; then
+                  echo "Running NCCL init script: ${NCCL_INIT_SCRIPT}"
+                  source ${NCCL_INIT_SCRIPT}
+                fi
+
+                # Overriding NCCL_SOCKET_IFNAME definition
+                export NCCL_SOCKET_IFNAME="eth0,eth1"
+                export NCCL_TUNER_CONFIG_PATH=/usr/local/gib/configs/tuner_config_a3u.txtpb
+
+                echo "Launching workload with the following arguments:"
+                {{- range $root.Values.workload.defaultArguments }}
+                echo "  {{ . }}"
+                {{- end }}
+                {{- range $root.Values.workload.arguments }}
+                echo "  {{ . }}"
+                {{- end }}
+                echo ""
+
+                sleep 10
+
+                bash /workload/launcher/launch-workload.sh \
+                {{- range $root.Values.workload.defaultArguments }}
+                {{ . }} \
+                {{- end }}
+                {{- range $root.Values.workload.arguments }}
+                {{ . }} \
+                {{- end }}
+
+
+              volumeMounts:
+                {{ if $root.Values.network.gibVersion }}
+                - name: gib
+                  mountPath: /usr/local/gib
+                {{ end }}
+
+                {{- if $root.Values.workload.configFile }}
+                - name: workload-configuration
+                  mountPath: {{ $root.Values.workload.configPath | default "/workload/configs" }}
+                {{- end }}
+
+                - name: workload-launcher
+                  mountPath: /workload/launcher
+
+                - name: shared-memory
+                  mountPath: /dev/shm
+
+                {{- range $pvc := $root.Values.volumes.pvcMounts }}
+                - name: "{{ $pvc.claimName }}"
+                  mountPath: "{{ $pvc.mountPath }}"
+                {{- end }}
+
+                {{- range $gcs := $root.Values.volumes.gcsMounts }}
+                - name: "{{ $gcs.bucketName }}"
+                  mountPath: "{{ $gcs.mountPath }}"
+                {{- end }}
+
+                {{- if $root.Values.volumes.ssdMountPath }}
+                - name: local-ssd
+                  mountPath: "{{ $root.Values.volumes.ssdMountPath }}"
+                {{- end }}
+
+              resources:
+                limits:
+                  nvidia.com/gpu: {{ $gpusPerNode }}
+                claims:
+                - name: compute-domain-channel
diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-launcher-configmap.yaml b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-launcher-configmap.yaml
new file mode 100644
index 0000000..7026e0f
--- /dev/null
+++ b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-launcher-configmap.yaml
@@ -0,0 +1,28 @@
+# yamllint disable
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: "{{ .Release.Name }}-launcher"
+data:
+  launch-workload.sh: |-
+{{- if .Values.workload_launcher }}
+{{ .Values.workload_launcher | nindent 4 }}
+{{- else }}
+    #!/bin/bash
+    echo "No workload launcher specified"
+    exit 1
+{{- end }}
diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-svc.yaml b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-svc.yaml
new file mode 100644
index 0000000..7cfe220
--- /dev/null
+++ b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/templates/workload-svc.yaml
@@ -0,0 +1,22 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: "{{ .Release.Name }}"
+spec:
+  clusterIP: None
+  selector:
+    jobset.sigs.k8s.io/jobset-name: "{{ .Release.Name }}"
diff --git a/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/values.yaml b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/values.yaml
new file mode 100644
index 0000000..2727823
--- /dev/null
+++ b/training/a4x/qwen3-30b/megatron-bridge-pretraining-gke/2node-FP8MX-GBS1024/recipe/values.yaml
@@ -0,0 +1,31 @@
+dwsSettings:
+  maxRunDurationSeconds: null
+network:
+  gibVersion: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib-arm64:v1.1.0
+  hostNetwork: true
+  ncclSettings:
+  - name: NCCL_DEBUG
+    value: WARN
+  subnetworks[]: null
+queue: null
+tasSettings:
+  topologyRequest:
+    kueue.x-k8s.io/podset-preferred-topology: kubernetes.io/hostname
+volumes:
+  gcsMounts:
+  - bucketName: null
+    mountPath: null
+  gcsVolumes: true
+  psVolumes: false
+workload:
+  arguments[]: null
+  configFile: null
+  configPath: null
+  defaultArguments[]: null
+  envs:
+  - name: ARTIFACT_DIR
+    value: null
+  - name: GLOO_SOCKET_IFNAME
+    value: eth0
+  gpus: 8
+  image: nvcr.io/nvidia/nemo:25.11