Skip to content

Commit 2a324ca

Browse files
committed
chore: squash branch changes on top of main; adopt Ruff+Pyright; update CI; exclude vite dist; restore deleted files from main (bigquery adapter + vite src/readme)
1 parent 766e47c commit 2a324ca

File tree

190 files changed

+2263
-3411
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

190 files changed

+2263
-3411
lines changed

.flake8

Lines changed: 0 additions & 3 deletions
This file was deleted.

.github/workflows/ci.yml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,14 @@ jobs:
4242
- name: Install tau2 for testing
4343
run: uv pip install git+https://github.com/sierra-research/tau2-bench.git@main
4444

45-
- name: Lint with flake8
46-
run: uv run flake8 eval_protocol tests examples scripts --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics
45+
- name: Ruff format (check)
46+
run: uv run ruff format --check .
4747

48-
- name: Type check with mypy
49-
run: uv run mypy eval_protocol
48+
- name: Ruff lint
49+
run: uv run ruff check .
50+
51+
- name: Type check with pyright
52+
run: uv run pyright
5053

5154
test-core:
5255
name: Core Tests (Python ${{ matrix.python-version }})

.pre-commit-config.yaml

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,29 @@
11
# See https://pre-commit.com for more information
22
# See https://pre-commit.com/hooks.html for more hooks
3+
exclude: |
4+
(^vite-app/|\.snap$)
35
repos:
46
- repo: https://github.com/pre-commit/pre-commit-hooks
5-
rev: v5.0.0
7+
rev: v6.0.0
68
hooks:
79
- id: trailing-whitespace
10+
exclude: "(^vite-app/|\\.snap$)"
811
- id: end-of-file-fixer
12+
exclude: "(^vite-app/|\\.snap$)"
913
- id: check-yaml
1014
- id: check-added-large-files
1115
- id: check-merge-conflict
1216
- id: check-toml
1317
- id: detect-private-key
1418

15-
- repo: https://github.com/psf/black
16-
rev: 25.1.0
19+
- repo: https://github.com/astral-sh/ruff-pre-commit
20+
rev: v0.12.8
1721
hooks:
18-
- id: black
19-
args: [--line-length=119]
22+
- id: ruff-format
23+
- id: ruff
24+
args: ["--fix"]
2025

21-
- repo: https://github.com/pycqa/isort
22-
rev: 6.0.1
26+
- repo: https://github.com/RobertCraigie/pyright-python
27+
rev: v1.1.403
2328
hooks:
24-
- id: isort
25-
name: isort (python)
26-
args: ["--profile", "black", "--line-length", "119", "--filter-files"]
27-
28-
- repo: https://github.com/pycqa/flake8
29-
rev: 7.3.0
30-
hooks:
31-
- id: flake8
32-
args: [--max-line-length=119, --max-complexity=100, "--ignore=E402,F401,F541,W503,E203,F811,E226,F841,E704,E713,E712,E231,E731,E501"]
33-
# additional_dependencies: [flake8-docstrings, flake8-import-order] # Optional: add flake8 plugins
34-
35-
- repo: https://github.com/pre-commit/mirrors-mypy
36-
rev: v1.17.0
37-
hooks:
38-
- id: mypy
39-
args: [--ignore-missing-imports, --install-types, --non-interactive]
40-
additional_dependencies:
41-
- types-requests
42-
- types-setuptools
43-
# Add other types-* packages your project uses
29+
- id: pyright

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1818
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1919
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21-
SOFTWARE.
21+
SOFTWARE.

development/normalize_sandbox_fusion.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
try:
5757
repobench_p_tokenizer = AutoTokenizer.from_pretrained("gpt2")
5858
except OSError:
59-
print("Warning: Could not load gpt2 tokenizer for Repobench-P. " "Falling back to basic split for token counting.")
59+
print("Warning: Could not load gpt2 tokenizer for Repobench-P. Falling back to basic split for token counting.")
6060
repobench_p_tokenizer = None
6161

6262

@@ -108,8 +108,7 @@ def format_aider_prompt(problem_json: dict) -> str:
108108
"""Format the prompt for Aider benchmark style problems."""
109109
question = problem_json.get("content", "")
110110
return (
111-
f"{question}\n\nPlease generate the code in the following format:\n"
112-
"```python\n# Your code response here\n```"
111+
f"{question}\n\nPlease generate the code in the following format:\n```python\n# Your code response here\n```"
113112
)
114113

115114

@@ -327,7 +326,7 @@ def normalize_problem_to_openai_format(
327326
try:
328327
labels = json.loads(labels_data)
329328
except json.JSONDecodeError:
330-
print(f"Warning: Skipping ID {problem_id_str} in {filename} " "- malformed JSON in labels.")
329+
print(f"Warning: Skipping ID {problem_id_str} in {filename} - malformed JSON in labels.")
331330
return None
332331
elif isinstance(labels_data, dict):
333332
labels = labels_data
@@ -426,10 +425,10 @@ def normalize_problem_to_openai_format(
426425
)
427426
return None
428427
if not final_user_content.strip() or not final_assistant_content.strip():
429-
print(f"Warning: Skipping ID {problem_id_str} in {filename} - " "empty processed content.")
428+
print(f"Warning: Skipping ID {problem_id_str} in {filename} - empty processed content.")
430429
return None
431430
if final_assistant_content.strip() == "import sys; sys.exit(0)":
432-
print(f"Warning: Skipping ID {problem_id_str} in {filename} - " "placeholder solution.")
431+
print(f"Warning: Skipping ID {problem_id_str} in {filename} - placeholder solution.")
433432
return None
434433

435434
return {
@@ -439,7 +438,7 @@ def normalize_problem_to_openai_format(
439438
]
440439
}
441440
except Exception as e:
442-
print(f"Warning: Skipping ID {problem_id_str} in {filename} - " f"error ({type(e).__name__}: {e}).")
441+
print(f"Warning: Skipping ID {problem_id_str} in {filename} - error ({type(e).__name__}: {e}).")
443442
import traceback
444443

445444
traceback.print_exc()
@@ -474,7 +473,7 @@ def main():
474473
file_error_count += 1
475474
continue
476475

477-
print(f"Processing file {filename_idx + 1}/{len(ALL_SOURCE_JSONL_FILES)}: " f"{filename}...")
476+
print(f"Processing file {filename_idx + 1}/{len(ALL_SOURCE_JSONL_FILES)}: {filename}...")
478477
lines_in_file = 0
479478
processed_in_file = 0
480479
skipped_in_file = 0
@@ -488,7 +487,7 @@ def main():
488487
try:
489488
problem_data = json.loads(stripped_line)
490489
except json.JSONDecodeError:
491-
print(f"Warning: Malformed JSON on line {line_number} " f"in {filepath}. Skipping line.")
490+
print(f"Warning: Malformed JSON on line {line_number} in {filepath}. Skipping line.")
492491
skipped_in_file += 1
493492
continue
494493

@@ -507,7 +506,7 @@ def main():
507506
processed_count += processed_in_file
508507
skipped_count += skipped_in_file
509508
except Exception as e:
510-
print(f"Error processing file {filepath}: {type(e).__name__}: {e}. " "Skipping rest of file.")
509+
print(f"Error processing file {filepath}: {type(e).__name__}: {e}. Skipping rest of file.")
511510
import traceback
512511

513512
traceback.print_exc()

development/notes/pytest_integration_proposal.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def tau2_rollout_processor(row: EvaluationRow, model: str, input_params: Dict, *
115115
# from the dataset and provide a simulated tool response.
116116
# 4. Call the model again with the tool response.
117117
# 5. Construct a final EvaluationRow with the full transcript.
118-
118+
119119
# The logic is encapsulated here, away from the test definition.
120120
processed_row = ep.default_rollout_processor(row, model, input_params)[0] # Simplified for example
121121
return [processed_row]
@@ -186,11 +186,11 @@ def best_of_n_processor(row: EvaluationRow, model: str, input_params: Dict, **kw
186186

187187
# Then, apply a reward function to score each candidate.
188188
scored_rows = ep.evaluate(candidate_rows, score_politeness)
189-
189+
190190
# Finally, select the best row.
191191
# This logic could be encapsulated in a helper, e.g., ep.select_best().
192192
best_row = select_best_by_group(scored_rows, score_key='politeness')
193-
193+
194194
return [best_row]
195195

196196
@evaluation_test(

development/utils/subprocess_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def start_ngrok_and_get_url(
139139
# Or by setting NGROK_AUTHTOKEN environment variable.
140140
# Forcing it via command line is also an option but less common for persistent setup.
141141
print(
142-
f"Note: Ngrok authtoken should be pre-configured by the user (e.g., 'ngrok config add-authtoken <token>') or via NGROK_AUTHTOKEN env var."
142+
"Note: Ngrok authtoken should be pre-configured by the user (e.g., 'ngrok config add-authtoken <token>') or via NGROK_AUTHTOKEN env var."
143143
)
144144
# Example if passing via env for the subprocess:
145145
# ngrok_env = os.environ.copy()

0 commit comments

Comments
 (0)