diff --git a/src/log_analyzer_cli/analyzer.py b/src/log_analyzer_cli/analyzer.py index cac046a..c4a4e4a 100644 --- a/src/log_analyzer_cli/analyzer.py +++ b/src/log_analyzer_cli/analyzer.py @@ -74,6 +74,13 @@ def analyze( Returns: Analysis result. """ + # Clear any patterns accumulated by a previous call so re-using the + # same LogAnalyzer across multiple batches does not leak earlier + # groups into the new result. Callers that want to keep history + # across calls should manage their own aggregation instead of + # relying on internal instance state. + self._error_patterns = {} + result = AnalysisResult() result.total_lines = len(entries) result.parsed_entries = len(entries) diff --git a/src/log_analyzer_cli/parsers/syslog.py b/src/log_analyzer_cli/parsers/syslog.py index 99891f3..50f2243 100644 --- a/src/log_analyzer_cli/parsers/syslog.py +++ b/src/log_analyzer_cli/parsers/syslog.py @@ -85,7 +85,6 @@ def parse(self, line: str) -> Optional[ParsedEntry]: groups = match.groupdict() timestamp = self._parse_timestamp(groups.get("timestamp", "")) - level = detect_log_level(line) # Check full line for level metadata = {} if groups.get("host"): @@ -94,11 +93,12 @@ def parse(self, line: str) -> Optional[ParsedEntry]: metadata["process"] = groups["process"] source = groups["process"] else: - # Use host as source when no process name source = groups.get("host") if groups.get("pid"): metadata["pid"] = groups["pid"] + level = detect_log_level(groups.get("message", "")) + return ParsedEntry( raw=line, timestamp=timestamp, diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index cd4ccf1..950134e 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -149,6 +149,31 @@ def test_reset(self): analyzer.reset() assert len(analyzer._error_patterns) == 0 + def test_repeated_analyze_does_not_leak_error_groups(self): + analyzer = LogAnalyzer() + first_batch = [ + ParsedEntry( + raw="Error: timeout after 30s", + level="ERROR", + message="timeout after 30s", + ), + ] + second_batch = [ + ParsedEntry( + raw="Error: db connection lost", + level="ERROR", + message="db connection lost", + ), + ] + + first_result = analyzer.analyze(first_batch) + second_result = analyzer.analyze(second_batch) + + assert len(first_result.error_groups) == 1 + assert len(second_result.error_groups) == 1 + assert second_result.error_groups[0].pattern != first_result.error_groups[0].pattern + assert second_result.error_groups[0].count == 1 + class TestAnalyzeLogEntries: """Tests for the analyze_log_entries function."""