-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgenerate_prompts.py
More file actions
219 lines (173 loc) · 7.81 KB
/
Copy pathgenerate_prompts.py
File metadata and controls
219 lines (173 loc) · 7.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/usr/bin/env python3
"""
Generate prompt.md files for MCode benchmarks.
Reads config.toml to determine which benchmarks to process.
Supports both 'server' and 'cli' benchmark types.
Templates are stored in templates/<version>/ directory as Jinja2 files:
- prompt_cli.md.j2 - Template for CLI benchmark prompts
- prompt_server.md.j2 - Template for server benchmark prompts
Template versions:
- v1: Original minimal template (used by initial experiments)
- v2: Improved template with drop-in replacement emphasis
"""
DEFAULT_TEMPLATE_VERSION = "v1" # Default to v1 for backward compatibility
import yaml
import tomllib
import argparse
import sys
from pathlib import Path
from typing import Optional
from jinja2 import Environment, FileSystemLoader, TemplateNotFound
def load_config(config_path: Path) -> dict:
"""Load configuration from TOML file."""
if not config_path.exists():
return {}
with open(config_path, 'rb') as f:
return tomllib.load(f)
def load_benchmark_config(benchmark_dir: Path) -> Optional[dict]:
"""Load benchmark.yml from a benchmark directory."""
benchmark_file = benchmark_dir / "benchmark.yml"
metadata_file = benchmark_dir / "metadata.yml"
if not benchmark_file.exists():
return None
with open(benchmark_file) as f:
config = yaml.safe_load(f)
# Skip benchmarks missing required fields
if 'type' not in config.get('benchmark', {}):
return None
# Load ID from metadata.yml
benchmark_id = "000"
if metadata_file.exists():
with open(metadata_file) as f:
metadata = yaml.safe_load(f)
benchmark_id = metadata.get("id", "000")
config['_id'] = benchmark_id
config['_dir'] = benchmark_dir
return config
def discover_benchmarks(dataset_dir: Path) -> dict:
"""Discover all benchmarks in the dataset directory."""
benchmarks = {}
for item in dataset_dir.iterdir():
if item.is_dir() and (item / "benchmark.yml").exists():
config = load_benchmark_config(item)
if config:
benchmarks[config['_id']] = config
return benchmarks
# Cache for Jinja2 environments (one per version)
_jinja_envs = {}
def get_jinja_env(version: str = DEFAULT_TEMPLATE_VERSION) -> Environment:
"""Get or create the Jinja2 environment for a specific template version."""
global _jinja_envs
if version not in _jinja_envs:
templates_dir = Path(__file__).parent / "templates" / version
if not templates_dir.exists():
raise ValueError(f"Template version '{version}' not found at {templates_dir}")
_jinja_envs[version] = Environment(
loader=FileSystemLoader(templates_dir),
trim_blocks=True,
lstrip_blocks=True,
)
return _jinja_envs[version]
def generate_prompt(config: dict, template_version: str = DEFAULT_TEMPLATE_VERSION) -> str:
"""Generate prompt.md content from benchmark configuration using Jinja2 templates."""
benchmark_type = config['benchmark']['type']
source_lang = config['source']['language'].capitalize()
source_desc = config['source']['description']
target_lang = config['destination']['language'].capitalize()
target_desc = config['destination']['description']
# Build instructions (handle both install_cmd and build_cmd)
install_cmd = config['destination'].get('install_cmd')
build_cmd = config['destination'].get('build_cmd')
build_steps = []
if install_cmd:
build_steps.append(f"Install dependencies: `{install_cmd}`")
if build_cmd:
build_steps.append(f"Build: `{build_cmd}`")
if not build_steps:
build_instructions = "No build steps required."
else:
build_instructions = "\n".join(f"- {step}" for step in build_steps)
# Load and render template based on benchmark type
env = get_jinja_env(template_version)
template_name = f"prompt_{benchmark_type}.md.j2"
try:
template = env.get_template(template_name)
except TemplateNotFound:
raise ValueError(f"Unknown benchmark type: {benchmark_type} (template {template_name} not found)")
# Prepare template variables
template_vars = {
'source_lang': source_lang,
'source_desc': source_desc,
'target_lang': target_lang,
'target_desc': target_desc,
'build_instructions': build_instructions,
'run_cmd': config['destination'].get('run_cmd', ''),
}
# Add server-specific variables
if benchmark_type == 'server':
template_vars['port_env_var'] = config['destination'].get('port_env_var', 'SERVER_PORT')
return template.render(**template_vars)
def main():
parser = argparse.ArgumentParser(description='Generate prompt.md files for MCode benchmarks')
parser.add_argument('--config', type=str, default='config.toml',
help='Path to config file (default: config.toml)')
parser.add_argument('--all', action='store_true',
help='Generate prompts for all benchmarks, ignoring config.toml filter')
parser.add_argument('--benchmark', type=str,
help='Generate prompt for a specific benchmark ID only')
parser.add_argument('--template-version', type=str, default=DEFAULT_TEMPLATE_VERSION,
help=f'Template version to use (default: {DEFAULT_TEMPLATE_VERSION})')
args = parser.parse_args()
# Setup paths
root_dir = Path(__file__).parent
dataset_dir = root_dir / "dataset"
config_path = root_dir / args.config
# Load config
config = load_config(config_path)
benchmark_ids = config.get('benchmarks', {}).get('ids', [])
# Discover benchmarks
benchmarks = discover_benchmarks(dataset_dir)
if not benchmarks:
print("No benchmarks found in dataset/")
sys.exit(1)
# Filter benchmarks
if args.benchmark:
# Single benchmark by ID
if args.benchmark not in benchmarks:
print(f"Benchmark ID '{args.benchmark}' not found")
print(f"Available IDs: {list(benchmarks.keys())}")
sys.exit(1)
benchmarks = {args.benchmark: benchmarks[args.benchmark]}
elif not args.all and benchmark_ids:
# Filter by config.toml
benchmarks = {k: v for k, v in benchmarks.items() if k in benchmark_ids}
if not benchmarks:
print(f"No benchmarks found matching IDs in config: {benchmark_ids}")
print(f"Available IDs: {list(discover_benchmarks(dataset_dir).keys())}")
sys.exit(1)
# Sort by ID
sorted_benchmarks = sorted(benchmarks.items(), key=lambda x: x[0])
print(f"Generating prompts for {len(sorted_benchmarks)} benchmark(s)...")
print(f"Using template version: {args.template_version}")
print("=" * 60)
for benchmark_id, config in sorted_benchmarks:
benchmark_name = config['benchmark']['name']
benchmark_type = config['benchmark']['type']
benchmark_dir = config['_dir']
print(f"[{benchmark_id}] {benchmark_name} ({benchmark_type})")
try:
prompt = generate_prompt(config, args.template_version)
prompt_path = benchmark_dir / "workspace" / "prompt.md"
# Ensure workspace directory exists
prompt_path.parent.mkdir(parents=True, exist_ok=True)
with open(prompt_path, 'w') as f:
f.write(prompt)
print(f" -> Generated {prompt_path.relative_to(root_dir)}")
except KeyError as e:
print(f" -> ERROR: Missing required field {e}")
except Exception as e:
print(f" -> ERROR: {e}")
print("=" * 60)
print("Done.")
if __name__ == "__main__":
main()