From c63b7bb0a913c9f5763eec59827a401d846af292 Mon Sep 17 00:00:00 2001 From: Zo Bot Date: Wed, 20 May 2026 13:41:36 +0000 Subject: [PATCH 1/2] =?UTF-8?q?detect=20log=20level=20from=20the=20extract?= =?UTF-8?q?ed=20message=20in=20syslog=20parser=20=E2=80=94=20running=20det?= =?UTF-8?q?ect=5Flog=5Flevel=20on=20the=20full=20line=20causes=20false=20p?= =?UTF-8?q?ositives=20when=20host=20or=20process=20names=20contain=20words?= =?UTF-8?q?=20like=20'error'=20or=20'warn';=20now=20it=20runs=20on=20the?= =?UTF-8?q?=20message=20portion=20only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/log_analyzer_cli/parsers/syslog.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log_analyzer_cli/parsers/syslog.py b/src/log_analyzer_cli/parsers/syslog.py index 99891f3..50f2243 100644 --- a/src/log_analyzer_cli/parsers/syslog.py +++ b/src/log_analyzer_cli/parsers/syslog.py @@ -85,7 +85,6 @@ def parse(self, line: str) -> Optional[ParsedEntry]: groups = match.groupdict() timestamp = self._parse_timestamp(groups.get("timestamp", "")) - level = detect_log_level(line) # Check full line for level metadata = {} if groups.get("host"): @@ -94,11 +93,12 @@ def parse(self, line: str) -> Optional[ParsedEntry]: metadata["process"] = groups["process"] source = groups["process"] else: - # Use host as source when no process name source = groups.get("host") if groups.get("pid"): metadata["pid"] = groups["pid"] + level = detect_log_level(groups.get("message", "")) + return ParsedEntry( raw=line, timestamp=timestamp, From f817ad424aca446214f9c06379122bed4fb6ed0e Mon Sep 17 00:00:00 2001 From: Zo Bot Date: Sat, 20 Jun 2026 19:27:09 +0000 Subject: [PATCH 2/2] reset error groups at the start of each LogAnalyzer.analyze call --- src/log_analyzer_cli/analyzer.py | 7 +++++++ tests/test_analyzer.py | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/log_analyzer_cli/analyzer.py b/src/log_analyzer_cli/analyzer.py index cac046a..c4a4e4a 100644 --- a/src/log_analyzer_cli/analyzer.py +++ b/src/log_analyzer_cli/analyzer.py @@ -74,6 +74,13 @@ def analyze( Returns: Analysis result. """ + # Clear any patterns accumulated by a previous call so re-using the + # same LogAnalyzer across multiple batches does not leak earlier + # groups into the new result. Callers that want to keep history + # across calls should manage their own aggregation instead of + # relying on internal instance state. + self._error_patterns = {} + result = AnalysisResult() result.total_lines = len(entries) result.parsed_entries = len(entries) diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index cd4ccf1..950134e 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -149,6 +149,31 @@ def test_reset(self): analyzer.reset() assert len(analyzer._error_patterns) == 0 + def test_repeated_analyze_does_not_leak_error_groups(self): + analyzer = LogAnalyzer() + first_batch = [ + ParsedEntry( + raw="Error: timeout after 30s", + level="ERROR", + message="timeout after 30s", + ), + ] + second_batch = [ + ParsedEntry( + raw="Error: db connection lost", + level="ERROR", + message="db connection lost", + ), + ] + + first_result = analyzer.analyze(first_batch) + second_result = analyzer.analyze(second_batch) + + assert len(first_result.error_groups) == 1 + assert len(second_result.error_groups) == 1 + assert second_result.error_groups[0].pattern != first_result.error_groups[0].pattern + assert second_result.error_groups[0].count == 1 + class TestAnalyzeLogEntries: """Tests for the analyze_log_entries function."""