diff --git a/CHANGELOG.md b/CHANGELOG.md index 222fdaa..0721255 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 becwright" error — instead of risking a silent misparse. (The `.bec.yaml` export bundle was already versioned via `becwright_bec`.) +### Fixed +- `becwright init --from-claude-md` no longer misreads a per-*function* line count + as a per-*file* cap. A phrase like "~50 lines per function, ~800 per file" used + to derive `max_lines --max 50` (flagging nearly every file); the file-cap + matcher now refuses to bridge across a comma or another number, so an ambiguous + soft guideline derives no cap instead of a wrong one. (Surfaced field-testing a + real Python repo.) + ### Documentation - Documented becwright's **stable contract** in `documentation/usage.md`: the CLI exit codes (`0` pass · `1` a blocking rule failed · `2` config/usage problem) diff --git a/src/becwright/cli.py b/src/becwright/cli.py index 2be2e65..359d2c3 100644 --- a/src/becwright/cli.py +++ b/src/becwright/cli.py @@ -417,10 +417,13 @@ def _read_claude_md(root: Path) -> str | None: # A line cap tied to *files* (not functions — that needs an AST): "files < 800 # lines" or "800 lines per file", EN/ES. The file/module anchor avoids mapping a -# "functions < 50 lines" rule, which becwright cannot enforce. +# "functions < 50 lines" rule, which becwright cannot enforce. The gap between the +# number and the file word excludes commas and digits so a match cannot bridge +# across a clause boundary — e.g. "~50 lines per function, ~800 per file" must not +# read the function's 50 as the file cap just because "file" appears later. _FILE_LINE_CAP = re.compile( - r"(?:files?|archivos?|modules?|m[óo]dulos?)\b[^.\n]{0,40}?(\d{2,4})\s*(?:lines?|l[ií]neas?)" - r"|(\d{2,4})\s*(?:lines?|l[ií]neas?)\b[^.\n]{0,25}?(?:per\s+|por\s+)?(?:files?|archivos?|modules?|m[óo]dulos?)", + r"(?:files?|archivos?|modules?|m[óo]dulos?)\b[^.,\n\d]{0,40}?(\d{2,4})\s*(?:lines?|l[ií]neas?)" + r"|(\d{2,4})\s*(?:lines?|l[ií]neas?)\b[^.,\n\d]{0,25}?(?:per\s+|por\s+)?(?:files?|archivos?|modules?|m[óo]dulos?)", re.IGNORECASE, ) diff --git a/tests/test_init.py b/tests/test_init.py index 85f7ee9..3196f5e 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -320,6 +320,16 @@ def test_max_lines_cap_ignores_function_length(): assert cli._max_lines_cap("Functions must be under 50 lines.") is None +def test_max_lines_cap_does_not_bridge_function_cap_to_file_word(): + # Real CLAUDE.md phrasing (bot-telegram field test): a per-function number and a + # per-file number in one clause, comma-joined. The function number (50) must NOT + # be captured just because "archivo"/"file" appears later past the comma. The file + # number (800) is elliptical (no "líneas" word of its own), so becwright declines + # rather than guessing wrong — an ambiguous soft guideline derives no cap. + assert cli._max_lines_cap("~50 líneas por función, ~800 por archivo") is None + assert cli._max_lines_cap("~50 lines per function, ~800 per file") is None + + def test_max_lines_cap_out_of_range(): assert cli._max_lines_cap("keep files under 40 lines") is None # below 50 assert cli._max_lines_cap("files must not exceed 9000 lines") is None # above 5000