HrachShah · HrachShah · May 20, 2026 · Jun 20, 2026 · sourcery-ai · Jun 20, 2026
diff --git a/src/log_analyzer_cli/analyzer.py b/src/log_analyzer_cli/analyzer.py
@@ -74,6 +74,13 @@ def analyze(
         Returns:
             Analysis result.
         """
+        # Clear any patterns accumulated by a previous call so re-using the
+        # same LogAnalyzer across multiple batches does not leak earlier
+        # groups into the new result. Callers that want to keep history
+        # across calls should manage their own aggregation instead of
+        # relying on internal instance state.
+        self._error_patterns = {}
+
         result = AnalysisResult()
         result.total_lines = len(entries)
         result.parsed_entries = len(entries)

diff --git a/src/log_analyzer_cli/parsers/syslog.py b/src/log_analyzer_cli/parsers/syslog.py
@@ -85,7 +85,6 @@ def parse(self, line: str) -> Optional[ParsedEntry]:
                 groups = match.groupdict()
 
                 timestamp = self._parse_timestamp(groups.get("timestamp", ""))
-                level = detect_log_level(line)  # Check full line for level
 
                 metadata = {}
                 if groups.get("host"):
@@ -94,11 +93,12 @@ def parse(self, line: str) -> Optional[ParsedEntry]:
                     metadata["process"] = groups["process"]
                     source = groups["process"]
                 else:
-                    # Use host as source when no process name
                     source = groups.get("host")
                 if groups.get("pid"):
                     metadata["pid"] = groups["pid"]
 
+                level = detect_log_level(groups.get("message", ""))
+
                 return ParsedEntry(
                     raw=line,
                     timestamp=timestamp,

diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py
@@ -149,6 +149,31 @@ def test_reset(self):
         analyzer.reset()
         assert len(analyzer._error_patterns) == 0
 
+    def test_repeated_analyze_does_not_leak_error_groups(self):
+        analyzer = LogAnalyzer()
+        first_batch = [
+            ParsedEntry(
+                raw="Error: timeout after 30s",
+                level="ERROR",
+                message="timeout after 30s",
+            ),
+        ]
+        second_batch = [
+            ParsedEntry(
+                raw="Error: db connection lost",
+                level="ERROR",
+                message="db connection lost",
+            ),
+        ]
+
+        first_result = analyzer.analyze(first_batch)
+        second_result = analyzer.analyze(second_batch)
+
+        assert len(first_result.error_groups) == 1
+        assert len(second_result.error_groups) == 1
+        assert second_result.error_groups[0].pattern != first_result.error_groups[0].pattern
+        assert second_result.error_groups[0].count == 1
+
 
 class TestAnalyzeLogEntries:
     """Tests for the analyze_log_entries function."""