Skip to content

Commit 500c6b8

Browse files
jimisolaJimisola Laursen
andauthored
perf: replace linear dict scans with O(1) lookups and cache Lark parser (#302)
* perf: replace linear dict scans with O(1) lookups and cache Lark parser - Refactor _get_urn_ids_for_svcs() to use dict.get() instead of iterating .items() - Refactor _get_nr_of_impls_for_req() to use len(dict.get()) to correctly count multiple annotations per requirement - Refactor _get_mvr_ids_for_req() to use dict.get() instead of nested iteration over .items() - Cache the Lark parser as a class attribute in GenericELTransformer to avoid recreating it on every filter call - Add unit tests for the refactored statistics_generator methods (TDD) Signed-off-by: jimisola <jimisola@jimisola.com> * docs: add regression smoke testing instructions to CLAUDE.md Signed-off-by: jimisola <jimisola@jimisola.com> * chore: remove copyright notice from new test file Signed-off-by: jimisola <jimisola@jimisola.com> * refactor: move _parser class attribute to top of GenericELTransformer Signed-off-by: jimisola <jimisola@jimisola.com> --------- Signed-off-by: jimisola <jimisola@jimisola.com> Co-authored-by: Jimisola Laursen <jimisola.laursen@resurs.se>
1 parent dcd00fa commit 500c6b8

4 files changed

Lines changed: 138 additions & 26 deletions

File tree

CLAUDE.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,37 @@ hatch run python src/reqstool/command.py export local -p ../reqstool-demo/docs/r
102102
hatch run python src/reqstool/command.py report --format markdown local -p ../reqstool-demo/docs/reqstool -o /tmp/report.md
103103
```
104104

105+
## Regression Smoke Testing
106+
107+
**Before creating a PR**, capture CLI output from `main` (or the parent branch) and compare it against the feature branch to verify no unintended changes. Strip ANSI codes so diffs are clean.
108+
109+
Run against **both** the in-repo test fixtures and `reqstool-demo`:
110+
111+
```bash
112+
# Capture baseline output on main/parent branch
113+
git checkout main
114+
hatch run python src/reqstool/command.py status local -p tests/resources/test_data/data/local/test_standard/baseline/ms-001 2>&1 | sed 's/\x1b\[[0-9;]*m//g' > /tmp/baseline-status-std.txt
115+
hatch run python src/reqstool/command.py report --format asciidoc local -p tests/resources/test_data/data/local/test_standard/baseline/ms-001 > /tmp/baseline-report-std.txt 2>&1
116+
hatch run python src/reqstool/command.py status local -p tests/resources/test_data/data/local/test_basic/baseline/ms-101 2>&1 | sed 's/\x1b\[[0-9;]*m//g' > /tmp/baseline-status-basic.txt
117+
hatch run python src/reqstool/command.py report --format asciidoc local -p tests/resources/test_data/data/local/test_basic/baseline/ms-101 > /tmp/baseline-report-basic.txt 2>&1
118+
hatch run python src/reqstool/command.py status local -p ../reqstool-demo/docs/reqstool 2>&1 | sed 's/\x1b\[[0-9;]*m//g' > /tmp/baseline-status-demo.txt
119+
hatch run python src/reqstool/command.py report --format asciidoc local -p ../reqstool-demo/docs/reqstool > /tmp/baseline-report-demo.txt 2>&1
120+
121+
# Switch to feature branch and capture output
122+
git checkout <feature-branch>
123+
# (same commands, writing to /tmp/feature-*.txt)
124+
125+
# Diff — must be identical unless the change intentionally alters output
126+
diff /tmp/baseline-status-std.txt /tmp/feature-status-std.txt
127+
diff /tmp/baseline-report-std.txt /tmp/feature-report-std.txt
128+
diff /tmp/baseline-status-basic.txt /tmp/feature-status-basic.txt
129+
diff /tmp/baseline-report-basic.txt /tmp/feature-report-basic.txt
130+
diff /tmp/baseline-status-demo.txt /tmp/feature-status-demo.txt
131+
diff /tmp/baseline-report-demo.txt /tmp/feature-report-demo.txt
132+
```
133+
134+
If a diff is expected (e.g. the PR intentionally changes output), note it in the PR description.
135+
105136
## Key Conventions
106137

107138
- **URN format**: `some:urn:string` — the separator is `:`. `UrnId` is the canonical composite key used throughout indexes.

src/reqstool/commands/status/statistics_generator.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,7 @@ def _get_combined_index_data(
6060
def _calculate(self, cid: CombinedIndexedDataset) -> StatisticsContainer:
6161
for urn_id in cid.requirements.keys():
6262
# Get all svc UrnIds related to current requirement
63-
svcs_urn_ids: List[UrnId] = self._get_urn_ids_for_svcs(
64-
urn_id=urn_id, svcs_from_req=cid.svcs_from_req.items()
65-
)
63+
svcs_urn_ids: List[UrnId] = self._get_urn_ids_for_svcs(urn_id=urn_id, svcs_from_req=cid.svcs_from_req)
6664

6765
# Get svcs for current requirement
6866
svcs: List[SVCData] = [cid.svcs[urn_id] for urn_id in svcs_urn_ids]
@@ -116,12 +114,7 @@ def _calculate(self, cid: CombinedIndexedDataset) -> StatisticsContainer:
116114
return self.stats_container
117115

118116
def _get_urn_ids_for_svcs(self, urn_id: UrnId, svcs_from_req: Dict[UrnId, List[UrnId]]) -> List[UrnId]:
119-
svcs_urn_ids: List[UrnId] = []
120-
for req_urn_id, svc_list in svcs_from_req:
121-
if urn_id == req_urn_id:
122-
for svc in svc_list:
123-
svcs_urn_ids.append(svc)
124-
return svcs_urn_ids
117+
return list(svcs_from_req.get(urn_id, []))
125118

126119
def _check_implementation(self, urn_id: UrnId, nr_of_implementations: int) -> bool:
127120
implementation = self.cid.requirements[urn_id].implementation
@@ -192,13 +185,8 @@ def _req_verification_equals(self, svcs: List[SVCData], verification: Sequence[V
192185
return True
193186
return False
194187

195-
# Get the nr of impls for current requirement
196188
def _get_nr_of_impls_for_req(self, urn_id: UrnId) -> int:
197-
nr_of_implementations = 0
198-
for annotation_id in self.cid.annotations_impls.keys():
199-
if annotation_id == urn_id:
200-
nr_of_implementations += 1
201-
return nr_of_implementations
189+
return len(self.cid.annotations_impls.get(urn_id, []))
202190

203191
def _get_annotated_automated_test_results_for_req(
204192
self,
@@ -231,13 +219,7 @@ def _get_automated_test_results_for_req(
231219
return test_results
232220

233221
def _get_mvr_ids_for_req(self, svcs_urn_ids: List[UrnId]) -> List[UrnId]:
234-
mvr_ids: List[UrnId] = []
235-
for svc_urn_id in svcs_urn_ids:
236-
for id, value in self.cid.mvrs_from_svc.items():
237-
if id == svc_urn_id:
238-
for urn in value:
239-
mvr_ids.append(urn)
240-
return mvr_ids
222+
return [urn for svc_urn_id in svcs_urn_ids for urn in self.cid.mvrs_from_svc.get(svc_urn_id, [])]
241223

242224
def _get_mvrs_for_req(self, mvrs: Dict[UrnId, MVRData], mvr_ids: List[UrnId]) -> List[MVRData] | None:
243225
return [mvrs[mvr_id] for mvr_id in mvr_ids] if mvr_ids else None

src/reqstool/expression_languages/generic_el.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class GenericELTransformer(
1616
):
1717
_data: T
1818
_urn: str
19+
_parser: Lark = None
1920

2021
_GRAMMAR = """
2122
start: expr
@@ -99,8 +100,7 @@ def STRING(self, token) -> str:
99100

100101
@staticmethod
101102
def parse_el(expression_language: str) -> ParseTree:
102-
parser = Lark(GenericELTransformer._GRAMMAR, parser="lalr")
103+
if GenericELTransformer._parser is None:
104+
GenericELTransformer._parser = Lark(GenericELTransformer._GRAMMAR, parser="lalr")
103105

104-
tree = parser.parse(expression_language)
105-
106-
return tree
106+
return GenericELTransformer._parser.parse(expression_language)
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from unittest.mock import MagicMock
2+
3+
from reqstool.commands.status.statistics_generator import StatisticsGenerator
4+
from reqstool.common.dataclasses.urn_id import UrnId
5+
6+
7+
def _make_generator():
8+
"""Create a StatisticsGenerator bypassing __init__."""
9+
gen = object.__new__(StatisticsGenerator)
10+
gen.cid = MagicMock()
11+
return gen
12+
13+
14+
# ---------------------------------------------------------------------------
15+
# _get_urn_ids_for_svcs
16+
# ---------------------------------------------------------------------------
17+
18+
REQ_1 = UrnId(urn="urn", id="REQ_001")
19+
REQ_2 = UrnId(urn="urn", id="REQ_002")
20+
SVC_1 = UrnId(urn="urn", id="SVC_001")
21+
SVC_2 = UrnId(urn="urn", id="SVC_002")
22+
SVC_3 = UrnId(urn="urn", id="SVC_003")
23+
MVR_1 = UrnId(urn="urn", id="MVR_001")
24+
MVR_2 = UrnId(urn="urn", id="MVR_002")
25+
26+
27+
def test_get_urn_ids_for_svcs_returns_matching():
28+
gen = _make_generator()
29+
svcs_from_req = {REQ_1: [SVC_1, SVC_2], REQ_2: [SVC_3]}
30+
result = gen._get_urn_ids_for_svcs(urn_id=REQ_1, svcs_from_req=svcs_from_req)
31+
assert result == [SVC_1, SVC_2]
32+
33+
34+
def test_get_urn_ids_for_svcs_returns_empty_for_missing_key():
35+
gen = _make_generator()
36+
svcs_from_req = {REQ_2: [SVC_3]}
37+
result = gen._get_urn_ids_for_svcs(urn_id=REQ_1, svcs_from_req=svcs_from_req)
38+
assert result == []
39+
40+
41+
def test_get_urn_ids_for_svcs_empty_dict():
42+
gen = _make_generator()
43+
result = gen._get_urn_ids_for_svcs(urn_id=REQ_1, svcs_from_req={})
44+
assert result == []
45+
46+
47+
# ---------------------------------------------------------------------------
48+
# _get_nr_of_impls_for_req
49+
# ---------------------------------------------------------------------------
50+
51+
52+
def test_get_nr_of_impls_for_req_single():
53+
gen = _make_generator()
54+
gen.cid.annotations_impls = {REQ_1: ["annotation_1"]}
55+
assert gen._get_nr_of_impls_for_req(urn_id=REQ_1) == 1
56+
57+
58+
def test_get_nr_of_impls_for_req_multiple():
59+
gen = _make_generator()
60+
gen.cid.annotations_impls = {REQ_1: ["annotation_1", "annotation_2"]}
61+
assert gen._get_nr_of_impls_for_req(urn_id=REQ_1) == 2
62+
63+
64+
def test_get_nr_of_impls_for_req_missing():
65+
gen = _make_generator()
66+
gen.cid.annotations_impls = {REQ_2: ["some_annotation"]}
67+
assert gen._get_nr_of_impls_for_req(urn_id=REQ_1) == 0
68+
69+
70+
def test_get_nr_of_impls_for_req_empty():
71+
gen = _make_generator()
72+
gen.cid.annotations_impls = {}
73+
assert gen._get_nr_of_impls_for_req(urn_id=REQ_1) == 0
74+
75+
76+
# ---------------------------------------------------------------------------
77+
# _get_mvr_ids_for_req
78+
# ---------------------------------------------------------------------------
79+
80+
81+
def test_get_mvr_ids_for_req_returns_matching():
82+
gen = _make_generator()
83+
gen.cid.mvrs_from_svc = {SVC_1: [MVR_1], SVC_2: [MVR_2]}
84+
result = gen._get_mvr_ids_for_req(svcs_urn_ids=[SVC_1, SVC_2])
85+
assert result == [MVR_1, MVR_2]
86+
87+
88+
def test_get_mvr_ids_for_req_missing_svc():
89+
gen = _make_generator()
90+
gen.cid.mvrs_from_svc = {SVC_2: [MVR_2]}
91+
result = gen._get_mvr_ids_for_req(svcs_urn_ids=[SVC_1])
92+
assert result == []
93+
94+
95+
def test_get_mvr_ids_for_req_empty():
96+
gen = _make_generator()
97+
gen.cid.mvrs_from_svc = {}
98+
result = gen._get_mvr_ids_for_req(svcs_urn_ids=[])
99+
assert result == []

0 commit comments

Comments
 (0)