Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c737c77
relational DSL: add AST + query normalization utilities
AlexanderOnischenko Jan 19, 2026
972c1a8
базовая нормализация (пока с ошибкой - невалидный RelationalQuery
AlexanderOnischenko Jan 19, 2026
1aacdb0
регрессионнные тесты на нормализатор
AlexanderOnischenko Jan 19, 2026
361f486
ignore .DS_Store files
AlexanderOnischenko Jan 19, 2026
d1df952
ignore .DS_Store files
AlexanderOnischenko Jan 19, 2026
b82a03a
дополнительные реггрессионные кейсы
AlexanderOnischenko Jan 19, 2026
150e000
багфикс нормализации селекторов
AlexanderOnischenko Jan 20, 2026
f4ac02d
добавил команду удаления тэгов
AlexanderOnischenko Jan 20, 2026
de1457d
добавил команду удаления тэгов
AlexanderOnischenko Jan 20, 2026
9347eb6
version increase on make scripit
AlexanderOnischenko Jan 20, 2026
19a35b3
перенос кода нормализации в один класс пайплайна
AlexanderOnischenko Jan 21, 2026
b0068b0
Тесты покрывают PlanNormalizer
AlexanderOnischenko Jan 21, 2026
8e412e1
уменьшена публичная поверхность PlanNormalizer
AlexanderOnischenko Jan 21, 2026
a195963
Revert "relational DSL: add AST + query normalization utilities"
AlexanderOnischenko Jan 21, 2026
440c3f5
убрал dsl (не та фича-ветка)
AlexanderOnischenko Jan 21, 2026
563eb2a
ruff fix (formatting)
AlexanderOnischenko Jan 21, 2026
3d6ab8f
нормализуем провайдера так же как и в других местах
AlexanderOnischenko Jan 21, 2026
b22e6ed
реорганизация папок тестов
AlexanderOnischenko Jan 21, 2026
f57afce
нонвые плохие кейсы на исправление
AlexanderOnischenko Jan 21, 2026
587196f
PlanNormalizer нормализует в одной точке, pyright fixes
AlexanderOnischenko Jan 21, 2026
085f9c3
Stop PlanNormalizer synthesizing required_context; always merge via s…
AlexanderOnischenko Jan 21, 2026
f1ce5f5
дефолты
AlexanderOnischenko Jan 21, 2026
c101a10
Make expected checks case-insensitive
AlexanderOnischenko Jan 21, 2026
d89e625
Merge pull request #104 from AlexanderOnischenko/feature/normalization
AlexanderOnischenko Jan 22, 2026
d112d74
TDD тест-кейсы для нормализации
AlexanderOnischenko Jan 22, 2026
a13283c
Merge pull request #106 from AlexanderOnischenko/codex/update-case-in…
AlexanderOnischenko Jan 22, 2026
e3cb6bc
Add pytest workflow for pull requests
AlexanderOnischenko Jan 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .demo_qa.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Локальные настройки demo_qa (генерируется командой: make init)
# Можно редактировать руками. Рекомендуется добавить в .gitignore.
DATA=_demo_data/shop
SCHEMA=_demo_data/shop/schema.yaml
CASES=examples/demo_qa/cases/retail_cases.json
# OUT можно не задавать: по умолчанию OUT=${DATA}/.runs/results.jsonl
# OUT=_demo_data/shop/.runs/results.jsonl
24 changes: 24 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: pytest

on:
pull_request:

jobs:
pytest:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install dependencies
run: python -m pip install --upgrade pip && python -m pip install -e ".[dev]"

- name: Run pytest (not slow, not known_bad)
env:
PYTHONPATH: .:src
run: python -m pytest -q -m "not slow and not known_bad"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ build/
.env.demo_qa
_demo_data/*/.runs/*
.coverage
.DS_Store
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ repos:
name: pytest
language: system
pass_filenames: false
entry: bash -lc 'source .venv/bin/activate PYTHONPATH=".:src:${PYTHONPATH}"; python -m pytest -q -m "not slow"'
entry: bash -lc 'source .venv/bin/activate PYTHONPATH=".:src:${PYTHONPATH}"; python -m pytest -q -m "not slow and not known_bad"'
26 changes: 25 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ COMPARE_TAG_JUNIT ?= $(DATA)/.runs/diff.tags.junit.xml

MAX_FAILS ?= 5

PURGE_RUNS ?= 0
PRUNE_HISTORY ?= 0
PRUNE_CASE_HISTORY ?= 0
DRY ?= 0

# ==============================================================================
# 6) Настройки LLM-конфига (редактирование/просмотр)
# ==============================================================================
Expand Down Expand Up @@ -94,7 +99,7 @@ LIMIT_FLAG := $(if $(strip $(LIMIT)),--limit $(LIMIT),)
batch batch-tag batch-failed batch-failed-from \
batch-missed batch-missed-from batch-failed-tag batch-missed-tag \
batch-fail-fast batch-max-fails \
stats history-case report-tag report-tag-changes tags case-run case-open compare compare-tag
stats history-case report-tag report-tag-changes tags tag-rm case-run case-open compare compare-tag

# ==============================================================================
# help (на русском)
Expand Down Expand Up @@ -142,6 +147,14 @@ help:
@echo " make case-run CASE=case_42 - прогнать один кейс"
@echo " make case-open CASE=case_42 - открыть артефакты кейса"
@echo ""
@echo "Уборка:"
@echo " make tag-rm TAG=... [DRY=1] [PURGE_RUNS=1] [PRUNE_HISTORY=1] [PRUNE_CASE_HISTORY=1]"
@echo " - удаляет effective snapshot тега и tag-latest* указатели"
@echo " DRY=1 - dry-run: только показать, что будет удалено"
@echo " PURGE_RUNS=1 - дополнительно удалить все runs, где run_meta.tag == TAG"
@echo " PRUNE_HISTORY=1 - вычистить записи с этим тегом из $${DATA}/.runs/history.jsonl"
@echo " PRUNE_CASE_HISTORY=1 - вычистить записи с этим тегом из $${DATA}/.runs/runs/cases/*.jsonl"
@echo ""
@echo "Сравнение результатов:"
@echo " make compare BASE=... NEW=... [DIFF_OUT=...] [JUNIT=...]"
@echo " make compare-tag BASE_TAG=baseline NEW_TAG=... [COMPARE_TAG_OUT=...] [COMPARE_TAG_JUNIT=...]"
Expand Down Expand Up @@ -340,3 +353,14 @@ compare-tag: check
--new-tag "$(NEW_TAG)" \
--out "$(OUT)" \
--junit "$(JUNIT)"

# команды очистки

tag-rm:
@test -n "$(strip $(TAG))" || (echo "TAG обязателен: make tag-rm TAG=..." && exit 1)
@TAG="$(TAG)" DATA="$(DATA)" PURGE_RUNS="$(PURGE_RUNS)" PRUNE_HISTORY="$(PRUNE_HISTORY)" PRUNE_CASE_HISTORY="$(PRUNE_CASE_HISTORY)" DRY="$(DRY)" $(PYTHON) -m scripts.tag_rm





87 changes: 87 additions & 0 deletions caffeinate_make.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/bin/sh
set -u

### ================== НАСТРОЙКИ (менять тут) ==================
DELAY=0 # 65 минут до первого запуска
INTERVAL=5400 # 90 минут между запусками
TICK=300 # печатать обратный отсчёт раз в 5 минут

# (опционально) папка проекта, где надо выполнять make
WORKDIR=/Users/alexanderonishchenko/Documents/_Projects/fetchgraph

LOG="$HOME/batch_tag.log"

# Команда для ПЕРВОГО запуска
FIRST_CMD='make batch-tag TAG=my_tag NOTE="прогон перед мерджем"'

# Команда для ПОВТОРНЫХ запусков
REPEAT_CMD='make batch-tag TAG=my_tag NOTE="прогон перед мерджем"'
### ============================================================

LOCKDIR="/tmp/batch_tag_runner.lock"

log() { printf '%s\n' "$*" | tee -a "$LOG"; }

cleanup() {
[ -n "${CAF_PID:-}" ] && kill "$CAF_PID" 2>/dev/null || true
rmdir "$LOCKDIR" 2>/dev/null || true
}
trap 'cleanup' EXIT INT TERM HUP

# Защита от двух копий
if ! mkdir "$LOCKDIR" 2>/dev/null; then
echo "Похоже, уже запущено (lock: $LOCKDIR). Если уверены — удалите lock и запустите снова." >&2
exit 1
fi

log "PID $$ started at $(date '+%F %T')"

# Не даём Mac уснуть
if command -v caffeinate >/dev/null 2>&1; then
caffeinate -dimsu -w $$ &
CAF_PID=$!
log "caffeinate pid: $CAF_PID"
else
log "WARNING: caffeinate не найден — Mac может уснуть."
fi

# Переходим в папку проекта (если существует)
if [ -d "$WORKDIR" ]; then
cd "$WORKDIR" || exit 1
else
log "WARNING: WORKDIR не существует: $WORKDIR (останусь в текущей папке)"
fi

countdown() {
total="$1"
label="$2"

while [ "$total" -gt 0 ]; do
mins=$(( total / 60 ))
secs=$(( total % 60 ))
log "$label: осталось ${mins}m$(printf '%02d' "$secs")s ($(date '+%F %T'))"

step=$TICK
[ "$total" -lt "$step" ] && step=$total
sleep "$step" || exit 1
total=$(( total - step ))
done
}

run_cmd() {
label="$1"
cmd="$2"

log "---- $label $(date '+%F %T') ----"
log "CMD: $cmd"
sh -c "$cmd" 2>&1 | tee -a "$LOG"
log ""
}

countdown "$DELAY" "До первого запуска"
run_cmd "FIRST RUN" "$FIRST_CMD"

while :; do
countdown "$INTERVAL" "До следующего запуска"
run_cmd "REPEAT RUN" "$REPEAT_CMD"
done
2 changes: 1 addition & 1 deletion examples/demo_qa/demo_qa.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[llm]
base_url = "http://localhost:8000/v1"
base_url = "http://localhost:8002/v1"
plan_model = "default"
synth_model = "default"
plan_temperature = 0.0
Expand Down
20 changes: 18 additions & 2 deletions examples/demo_qa/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,14 @@ def _stringify(value: object | None) -> str | None:
return str(value)


def _normalize_text(value: str) -> str:
return value.strip().casefold()


def _normalize_strings(values: Iterable[object]) -> list[str]:
return [_normalize_text(str(value)) for value in values]


def _match_expected(case: Case, answer: str | None) -> ExpectedCheck | None:
if not case.has_asserts:
return None
Expand All @@ -218,7 +226,15 @@ def _match_expected(case: Case, answer: str | None) -> ExpectedCheck | None:
return ExpectedCheck(mode="none", expected=expected_value, passed=False, detail="no answer")
if case.expected is not None:
expected_str = _stringify(case.expected) or ""
passed = answer.strip() == expected_str.strip()
if isinstance(case.expected, (list, tuple, set)):
expected_items = _normalize_strings(case.expected)
answer_items = _normalize_strings(answer) if isinstance(answer, (list, tuple, set)) else []
if isinstance(case.expected, set) or isinstance(answer, set):
passed = set(expected_items) == set(answer_items)
else:
passed = expected_items == answer_items
else:
passed = _normalize_text(answer) == _normalize_text(expected_str)
detail = None if passed else f"expected={expected_str!r}, got={answer!r}"
return ExpectedCheck(mode="exact", expected=expected_str, passed=passed, detail=detail)
if case.expected_regex is not None:
Expand All @@ -229,7 +245,7 @@ def _match_expected(case: Case, answer: str | None) -> ExpectedCheck | None:
return ExpectedCheck(mode="regex", expected=expected_regex, passed=passed, detail=detail)
if case.expected_contains is not None:
expected_contains = _stringify(case.expected_contains) or ""
passed = expected_contains in answer
passed = _normalize_text(expected_contains) in _normalize_text(answer)
detail = None if passed else f"expected to contain {expected_contains!r}"
return ExpectedCheck(mode="contains", expected=expected_contains, passed=passed, detail=detail)
return None
Expand Down
22 changes: 21 additions & 1 deletion examples/demo_qa/tests/test_demo_qa_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_match_expected_coerces_non_string_expected_values() -> None:
def test_match_expected_contains_pass_and_fail() -> None:
case = Case(id="c2", question="Q", expected_contains="bar")

match = _match_expected(case, "value bar baz")
match = _match_expected(case, "value BAR baz")
assert match is not None
assert match.passed is True

Expand All @@ -47,6 +47,26 @@ def test_match_expected_contains_pass_and_fail() -> None:
assert missing_answer.detail == "no answer"


def test_match_expected_equals_is_case_insensitive() -> None:
case = Case(id="c3", question="Q", expected="Alpha")

match = _match_expected(case, "alpha")
assert match is not None
assert match.passed is True


def test_match_expected_list_comparison_normalizes_elements() -> None:
case = Case(id="c4", question="Q", expected=["Foo", "Bar"])

match = _match_expected(case, cast(str, ["foo", "bar"]))
assert match is not None
assert match.passed is True

mismatch = _match_expected(case, cast(str, ["foo", "baz"]))
assert mismatch is not None
assert mismatch.passed is False


def test_diff_runs_tracks_regressions_and_improvements() -> None:
baseline = [
RunResult(
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "fetchgraph"
version = "0.2.0"
version = "0.2.1"
description = "Graph-like planning → context fetching → synthesis agent (library-style)."
readme = "README.md"
requires-python = ">=3.11"
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ markers =
slow: slow tests (exclude via -m "not slow")
integration: integration tests (providers / IO / external deps)
e2e: end-to-end scenarios
known_bad: real-world TDD cases that are allowed to fail (excluded from CI by default)

# Удобные дефолты для логов в CI и локально
log_cli = true
Expand Down
Loading