HrachShah · HrachShah · Jun 20, 2026 · coderabbitai · Jun 20, 2026
diff --git a/src/log_analyzer_cli/cli.py b/src/log_analyzer_cli/cli.py
@@ -193,36 +193,45 @@ def _parse_file(
 ):
     """Parse log file with optional filtering."""
     entries = []
-    
+
     from log_analyzer_cli.parsers import ParsedEntry
-    from log_analyzer_cli.utils import detect_log_level, parse_timestamp
+    from log_analyzer_cli.utils import parse_timestamp
     import re
-    
+
     compiled_pattern = re.compile(search_pattern) if search_pattern else None
-    
+
     for line in read_log_file(file_path):
         line = line.rstrip("\n\r")
         if not line:
             continue
-
-        if include_levels:
-            level = detect_log_level(line)
-            if level not in include_levels:
-                continue
-
+
         if compiled_pattern and not compiled_pattern.search(line):
             continue
-        
+
         timestamp = parse_timestamp(line)
         if start_time and timestamp and timestamp < start_time:
             continue
         if end_time and timestamp and timestamp > end_time:
             continue
-        
+
         parsed = parser.parse(line)
-        if parsed:
-            entries.append(parsed)
-
+        if not parsed:
+            continue
+
+        # Filter on the parser's level rather than re-scanning the raw
+        # line: the parser already strips host/process names (the syslog
+        # parser detects level from the message portion only), so a
+        # WARNING line whose message happens to contain the word "error"
+        # is reported as WARNING consistently in both the filter check
+        # and the output. The pre-parse filter form scanned the full
+        # line and could let "WARNING: error in user input" slip past
+        # --levels=ERROR, only for the output to then label it WARNING
+        # — the filter and the report disagreed about the same line.
+        if include_levels and parsed.level not in include_levels:
+            continue
+
+        entries.append(parsed)
+
     return entries
 
 

diff --git a/src/log_analyzer_cli/parsers/generic.py b/src/log_analyzer_cli/parsers/generic.py
@@ -41,8 +41,12 @@ def parse(self, line: str) -> Optional[ParsedEntry]:
             return None
 
         timestamp = self._parse_timestamp(match.group("timestamp"))
-        level = detect_log_level(line)
-
+        # Detect the level from the message portion only; scanning the
+        # full line picks up words like "error" that appear inside the
+        # timestamp, hostname, or process name and misclassifies an
+        # otherwise clean WARNING line as ERROR. The extracted message
+        # starts right after the timestamp and is what the user
+        # actually thinks of as the log entry's content.
         message_start = match.end()
         message = line[message_start:].strip()
 
@@ -51,7 +55,7 @@ def parse(self, line: str) -> Optional[ParsedEntry]:
         return ParsedEntry(
             raw=line,
             timestamp=timestamp,
-            level=level,
+            level=detect_log_level(message),
             message=message,
             metadata={"format": "generic"},
         )

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -72,7 +72,27 @@ def test_analyze_no_group(self, runner, json_file):
     def test_analyze_level_filter(self, runner, json_file):
         result = runner.invoke(main, ["analyze", str(json_file), "-l", "ERROR,WARNING"])
         assert result.exit_code == 0
-
+
+    def test_analyze_level_filter_uses_parsed_level_not_raw_line(
+        self, runner, tmp_path
+    ):
+        # JSON log line where the message body mentions "error" as part of a
+        # description but the structured level field is WARNING. The
+        # pre-
+        log = tmp_path / "host-error-actual-warn.log"
+        log.write_text(
+            '{"level": "WARNING", "message": "error in the system"}\n'
+        )
+        result = runner.invoke(main, ["analyze", str(log), "-l", "WARNING", "-v"])
+        assert result.exit_code == 0
+        assert "Total Lines:" in result.output
+        assert "WARNING" in result.output
+        # "ERROR" appears as a section header ("TOP ERROR GROUPS") so
+        # check the level-distribution line specifically.
+        import re
+        level_rows = re.findall(r"\bERROR\b\s+:\s+\d+", result.output)
+        assert not level_rows, f"unexpected ERROR level row: {level_rows}"
+
     def test_analyze_pattern_filter(self, runner, json_file):
         result = runner.invoke(main, ["analyze", str(json_file), "-p", "database"])
         assert result.exit_code == 0