google · NP2241 · Jan 15, 2026
@@ -0,0 +1,28 @@
+# NL2SQL GRPO Example
+
+This example trains a model to translate natural language questions into SQL
+over a small SQLite database using GRPO. Rewards are based on execution success
+and exact result matching.
+
+## Setup
+
+Build the SQLite database:
+
+```
+python3 examples/rl/grpo/nl2sql/build_db.py
+```
+
+## Train
+
+Run GRPO with the provided config:
+
+```
+python3 -m tunix.cli.grpo_main \
+  examples/rl/grpo/nl2sql/configs/base_config.yaml
+```
+
+## Notes
+
+- The model should output a single SQL SELECT statement.
+- The reward function is in `tunix/cli/reward_fn/nl2sql.py`.
+
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Builds the SQLite database for the NL2SQL GRPO example."""
+
+from __future__ import annotations
+
+import sqlite3
+from pathlib import Path
+
+
+def main() -> None:
+  example_dir = Path(__file__).resolve().parent
+  schema_path = example_dir / "schema.sql"
+  db_path = example_dir / "example.sqlite"
+
+  if not schema_path.exists():
+    raise FileNotFoundError(f"Missing schema file: {schema_path}")
+
+  if db_path.exists():
+    db_path.unlink()
+
+  schema_sql = schema_path.read_text(encoding="utf-8")
+  with sqlite3.connect(db_path) as conn:
+    conn.executescript(schema_sql)
+    conn.commit()
+
+  print(f"SQLite DB created at {db_path}")
+
+
+if __name__ == "__main__":
+  main()
+
@@ -0,0 +1,89 @@
+model_config:
+  model_name: "qwen2.5-0.5b-instruct"
+  model_id: "Qwen/Qwen2.5-0.5B-Instruct"
+  model_source: "huggingface"
+  model_display: false
+  intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
+  model_download_path: "/tmp/models"
+  mesh:
+    shape: "(1,1)"
+    axis_names: "('fsdp','tp')"
+  rng_seed: 42
+actor_model_config:
+  lora_config: null
+  model_display: false
+  mesh:
+    shape: "(1,1)"
+    axis_names: "('fsdp','tp')"
+reference_model_config:
+  model_name: "qwen2.5-0.5b-instruct"
+  model_id: "Qwen/Qwen2.5-0.5B-Instruct"
+  model_source: "huggingface"
+  model_display: false
+  intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
+  model_download_path: "/tmp/models"
+  mesh:
+    shape: "(1,1)"
+    axis_names: "('fsdp','tp')"
+  rng_seed: 42
+rollout_model_config:
+  model_name: "qwen2.5-0.5b-instruct"
+  model_id: "Qwen/Qwen2.5-0.5B-Instruct"
+  model_source: "huggingface"
+  model_display: false
+  intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
+  model_download_path: "/tmp/models"
+  mesh:
+    shape: "(1,1)"
+    axis_names: "('fsdp','tp')"
+tokenizer_config:
+  tokenizer_type: "huggingface"
+  tokenizer_path: "Qwen/Qwen2.5-0.5B-Instruct"
+  add_bos: False
+  add_eos: True
+data_module: "examples/rl/grpo/nl2sql/data.py:create_dataset"
+dataset_name: "nl2sql"
+batch_size: 1
+num_batches: 10
+num_test_batches: 2
+num_train_epochs: 1
+rl_training_config:
+  actor_optimizer_config:
+    opt_type: "adamw"
+    peak_value: 3e-6
+    schedule_type: "warmup_cosine_decay_schedule"
+    init_value: 0.0
+    end_value: 0.0
+    warmup_ratio: 0.1
+    warmup_steps: 1
+    decay_steps: 10
+    b1: 0.9
+    b2: 0.99
+    weight_decay: 0.1
+    max_grad_norm: 0.1
+  eval_every_n_steps: 2
+  max_steps: 10
+  metrics_logging_options:
+    log_dir: "/tmp/tensorboard/grpo_nl2sql_qwen2p5_0p5b"
+    flush_every_n_steps: 20
+  checkpointing_options:
+    save_interval_steps: 10
+    max_to_keep: 2
+  profiler_options: {}
+rollout_config:
+  total_generation_steps: 256
+  max_prompt_length: 256
+  temperature: 0.2
+  top_p: 0.95
+  top_k: 50
+rollout_engine: "vanilla"
+offload_to_cpu: False
+verl_compatible: false
+grpo_config:
+  num_generations: 2
+  num_iterations: 1
+  beta: 0.08
+  epsilon: 0.2
+reward_functions:
+  - "tunix/cli/reward_fn/nl2sql.py"
+
@@ -0,0 +1,89 @@
+model_config:
+  model_name: "qwen2.5-0.5b"
+  model_id: "Qwen/Qwen2.5-0.5B"
+  model_source: "huggingface"
+  model_display: false
+  intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
+  model_download_path: "/tmp/models"
+  mesh:
+    shape: "(1,1)"
+    axis_names: "('fsdp','tp')"
+  rng_seed: 42
+actor_model_config:
+  lora_config: null
+  model_display: false
+  mesh:
+    shape: "(1,1)"
+    axis_names: "('fsdp','tp')"
+reference_model_config:
+  model_name: "qwen2.5-0.5b"
+  model_id: "Qwen/Qwen2.5-0.5B"
+  model_source: "huggingface"
+  model_display: false
+  intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
+  model_download_path: "/tmp/models"
+  mesh:
+    shape: "(1,1)"
+    axis_names: "('fsdp','tp')"
+  rng_seed: 42
+rollout_model_config:
+  model_name: "qwen2.5-0.5b"
+  model_id: "Qwen/Qwen2.5-0.5B"
+  model_source: "huggingface"
+  model_display: false
+  intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
+  model_download_path: "/tmp/models"
+  mesh:
+    shape: "(1,1)"
+    axis_names: "('fsdp','tp')"
+tokenizer_config:
+  tokenizer_type: "huggingface"
+  tokenizer_path: "Qwen/Qwen2.5-0.5B"
+  add_bos: False
+  add_eos: True
+data_module: "examples/rl/grpo/nl2sql/data.py:create_dataset"
+dataset_name: "nl2sql"
+batch_size: 1
+num_batches: 10
+num_test_batches: 2
+num_train_epochs: 1
+rl_training_config:
+  actor_optimizer_config:
+    opt_type: "adamw"
+    peak_value: 3e-6
+    schedule_type: "warmup_cosine_decay_schedule"
+    init_value: 0.0
+    end_value: 0.0
+    warmup_ratio: 0.1
+    warmup_steps: 1
+    decay_steps: 10
+    b1: 0.9
+    b2: 0.99
+    weight_decay: 0.1
+    max_grad_norm: 0.1
+  eval_every_n_steps: 1
+  max_steps: 1
+  metrics_logging_options:
+    log_dir: "/tmp/tensorboard/grpo_nl2sql_qwen2p5_0p5b"
+    flush_every_n_steps: 20
+  checkpointing_options:
+    save_interval_steps: 10
+    max_to_keep: 2
+  profiler_options: {}
+rollout_config:
+  total_generation_steps: 256
+  max_prompt_length: 256
+  temperature: 0.9
+  top_p: 1.0
+  top_k: 50
+rollout_engine: "vanilla"
+offload_to_cpu: False
+verl_compatible: false
+grpo_config:
+  num_generations: 2
+  num_iterations: 1
+  beta: 0.08
+  epsilon: 0.2
+reward_functions:
+  - "tunix/cli/reward_fn/nl2sql.py"
+
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Dataset loader for the NL2SQL GRPO example."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import grain
+
+
+SYSTEM_PROMPT = (
+    "You translate natural language questions into SQLite SQL. "
+    "Only output a single SQL SELECT statement. "
+    "Do not include explanations, comments, or markdown."
+)
+
+SCHEMA_TEXT = """Tables:
+customers(customer_id, name, city)
+products(product_id, name, price)
+orders(order_id, customer_id, product_id, order_date, quantity)"""
+
+USER_TEMPLATE = """Schema:
+{schema}
+
+Example:
+Question: How many customers are there?
+SQL: SELECT COUNT(*) FROM customers;
+
+Question: {question}
+SQL:"""
+
+
+def _load_jsonl(path: Path) -> list[dict[str, Any]]:
+  records: list[dict[str, Any]] = []
+  for line in path.read_text(encoding="utf-8").splitlines():
+    line = line.strip()
+    if not line:
+      continue
+    records.append(json.loads(line))
+  return records
+
+
+def create_dataset(data_path: str | None = None) -> grain.MapDataset:
+  """Loads the JSONL dataset and formats prompts for chat templating."""
+  example_dir = Path(__file__).resolve().parent
+  dataset_path = Path(data_path) if data_path else example_dir / "nl2sql_data.jsonl"
+
+  if not dataset_path.exists():
+    raise FileNotFoundError(f"Missing dataset file: {dataset_path}")
+
+  data = _load_jsonl(dataset_path)
+
+  def _process_example(example: dict[str, Any]) -> dict[str, Any]:
+    question = example["question"]
+    prompt_text = USER_TEMPLATE.format(schema=SCHEMA_TEXT, question=question)
+    return {
+        "prompt": [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": prompt_text},
+        ],
+        "id": example["id"],
+        "question": question,
+        "gold_sql": example["gold_sql"],
+        "gold_result": example["gold_result"],
+    }
+
+  return grain.MapDataset.source(data).map(_process_example)
+