Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions examples/rl/grpo/nl2sql/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# NL2SQL GRPO Example

This example trains a model to translate natural language questions into SQL
over a small SQLite database using GRPO. Rewards are based on execution success
and exact result matching.

## Setup

Build the SQLite database:

```
python3 examples/rl/grpo/nl2sql/build_db.py
```

## Train

Run GRPO with the provided config:

```
python3 -m tunix.cli.grpo_main \
examples/rl/grpo/nl2sql/configs/base_config.yaml
```

## Notes

- The model should output a single SQL SELECT statement.
- The reward function is in `tunix/cli/reward_fn/nl2sql.py`.

45 changes: 45 additions & 0 deletions examples/rl/grpo/nl2sql/build_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python3
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Builds the SQLite database for the NL2SQL GRPO example."""

from __future__ import annotations

import sqlite3
from pathlib import Path


def main() -> None:
example_dir = Path(__file__).resolve().parent
schema_path = example_dir / "schema.sql"
db_path = example_dir / "example.sqlite"

if not schema_path.exists():
raise FileNotFoundError(f"Missing schema file: {schema_path}")

if db_path.exists():
db_path.unlink()

schema_sql = schema_path.read_text(encoding="utf-8")
with sqlite3.connect(db_path) as conn:
conn.executescript(schema_sql)
conn.commit()

print(f"SQLite DB created at {db_path}")


if __name__ == "__main__":
main()

89 changes: 89 additions & 0 deletions examples/rl/grpo/nl2sql/configs/base_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
model_config:
model_name: "qwen2.5-0.5b-instruct"
model_id: "Qwen/Qwen2.5-0.5B-Instruct"
model_source: "huggingface"
model_display: false
intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
model_download_path: "/tmp/models"
mesh:
shape: "(1,1)"
axis_names: "('fsdp','tp')"
rng_seed: 42
actor_model_config:
lora_config: null
model_display: false
mesh:
shape: "(1,1)"
axis_names: "('fsdp','tp')"
reference_model_config:
model_name: "qwen2.5-0.5b-instruct"
model_id: "Qwen/Qwen2.5-0.5B-Instruct"
model_source: "huggingface"
model_display: false
intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
model_download_path: "/tmp/models"
mesh:
shape: "(1,1)"
axis_names: "('fsdp','tp')"
rng_seed: 42
rollout_model_config:
model_name: "qwen2.5-0.5b-instruct"
model_id: "Qwen/Qwen2.5-0.5B-Instruct"
model_source: "huggingface"
model_display: false
intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
model_download_path: "/tmp/models"
mesh:
shape: "(1,1)"
axis_names: "('fsdp','tp')"
tokenizer_config:
tokenizer_type: "huggingface"
tokenizer_path: "Qwen/Qwen2.5-0.5B-Instruct"
add_bos: False
add_eos: True
data_module: "examples/rl/grpo/nl2sql/data.py:create_dataset"
dataset_name: "nl2sql"
batch_size: 1
num_batches: 10
num_test_batches: 2
num_train_epochs: 1
rl_training_config:
actor_optimizer_config:
opt_type: "adamw"
peak_value: 3e-6
schedule_type: "warmup_cosine_decay_schedule"
init_value: 0.0
end_value: 0.0
warmup_ratio: 0.1
warmup_steps: 1
decay_steps: 10
b1: 0.9
b2: 0.99
weight_decay: 0.1
max_grad_norm: 0.1
eval_every_n_steps: 2
max_steps: 10
metrics_logging_options:
log_dir: "/tmp/tensorboard/grpo_nl2sql_qwen2p5_0p5b"
flush_every_n_steps: 20
checkpointing_options:
save_interval_steps: 10
max_to_keep: 2
profiler_options: {}
rollout_config:
total_generation_steps: 256
max_prompt_length: 256
temperature: 0.2
top_p: 0.95
top_k: 50
rollout_engine: "vanilla"
offload_to_cpu: False
verl_compatible: false
grpo_config:
num_generations: 2
num_iterations: 1
beta: 0.08
epsilon: 0.2
reward_functions:
- "tunix/cli/reward_fn/nl2sql.py"

89 changes: 89 additions & 0 deletions examples/rl/grpo/nl2sql/configs/debug_max_steps_1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
model_config:
model_name: "qwen2.5-0.5b"
model_id: "Qwen/Qwen2.5-0.5B"
model_source: "huggingface"
model_display: false
intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
model_download_path: "/tmp/models"
mesh:
shape: "(1,1)"
axis_names: "('fsdp','tp')"
rng_seed: 42
actor_model_config:
lora_config: null
model_display: false
mesh:
shape: "(1,1)"
axis_names: "('fsdp','tp')"
reference_model_config:
model_name: "qwen2.5-0.5b"
model_id: "Qwen/Qwen2.5-0.5B"
model_source: "huggingface"
model_display: false
intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
model_download_path: "/tmp/models"
mesh:
shape: "(1,1)"
axis_names: "('fsdp','tp')"
rng_seed: 42
rollout_model_config:
model_name: "qwen2.5-0.5b"
model_id: "Qwen/Qwen2.5-0.5B"
model_source: "huggingface"
model_display: false
intermediate_ckpt_dir: "/tmp/intermediate_ckpt/nl2sql_qwen2p5_0p5b"
model_download_path: "/tmp/models"
mesh:
shape: "(1,1)"
axis_names: "('fsdp','tp')"
tokenizer_config:
tokenizer_type: "huggingface"
tokenizer_path: "Qwen/Qwen2.5-0.5B"
add_bos: False
add_eos: True
data_module: "examples/rl/grpo/nl2sql/data.py:create_dataset"
dataset_name: "nl2sql"
batch_size: 1
num_batches: 10
num_test_batches: 2
num_train_epochs: 1
rl_training_config:
actor_optimizer_config:
opt_type: "adamw"
peak_value: 3e-6
schedule_type: "warmup_cosine_decay_schedule"
init_value: 0.0
end_value: 0.0
warmup_ratio: 0.1
warmup_steps: 1
decay_steps: 10
b1: 0.9
b2: 0.99
weight_decay: 0.1
max_grad_norm: 0.1
eval_every_n_steps: 1
max_steps: 1
metrics_logging_options:
log_dir: "/tmp/tensorboard/grpo_nl2sql_qwen2p5_0p5b"
flush_every_n_steps: 20
checkpointing_options:
save_interval_steps: 10
max_to_keep: 2
profiler_options: {}
rollout_config:
total_generation_steps: 256
max_prompt_length: 256
temperature: 0.9
top_p: 1.0
top_k: 50
rollout_engine: "vanilla"
offload_to_cpu: False
verl_compatible: false
grpo_config:
num_generations: 2
num_iterations: 1
beta: 0.08
epsilon: 0.2
reward_functions:
- "tunix/cli/reward_fn/nl2sql.py"

84 changes: 84 additions & 0 deletions examples/rl/grpo/nl2sql/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env python3
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Dataset loader for the NL2SQL GRPO example."""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

import grain


SYSTEM_PROMPT = (
"You translate natural language questions into SQLite SQL. "
"Only output a single SQL SELECT statement. "
"Do not include explanations, comments, or markdown."
)

SCHEMA_TEXT = """Tables:
customers(customer_id, name, city)
products(product_id, name, price)
orders(order_id, customer_id, product_id, order_date, quantity)"""

USER_TEMPLATE = """Schema:
{schema}

Example:
Question: How many customers are there?
SQL: SELECT COUNT(*) FROM customers;

Question: {question}
SQL:"""


def _load_jsonl(path: Path) -> list[dict[str, Any]]:
records: list[dict[str, Any]] = []
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line:
continue
records.append(json.loads(line))
return records


def create_dataset(data_path: str | None = None) -> grain.MapDataset:
"""Loads the JSONL dataset and formats prompts for chat templating."""
example_dir = Path(__file__).resolve().parent
dataset_path = Path(data_path) if data_path else example_dir / "nl2sql_data.jsonl"

if not dataset_path.exists():
raise FileNotFoundError(f"Missing dataset file: {dataset_path}")

data = _load_jsonl(dataset_path)

def _process_example(example: dict[str, Any]) -> dict[str, Any]:
question = example["question"]
prompt_text = USER_TEMPLATE.format(schema=SCHEMA_TEXT, question=question)
return {
"prompt": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": prompt_text},
],
"id": example["id"],
"question": question,
"gold_sql": example["gold_sql"],
"gold_result": example["gold_result"],
}

return grain.MapDataset.source(data).map(_process_example)

Loading