HrachShah · HrachShah · Jun 22, 2026 · coderabbitai · Jun 22, 2026
diff --git a/src/log_analyzer_cli/cli.py b/src/log_analyzer_cli/cli.py
@@ -5,6 +5,7 @@
 import sys
 from pathlib import Path
 from typing import Optional
+from datetime import timezone
 
 import click
 
@@ -104,6 +105,7 @@ def analyze(
             except ValueError:
                 click.echo(f"Error: Invalid start-time format. Use YYYY-MM-DD HH:MM:SS", err=True)
                 sys.exit(1)
+            start_dt = start_dt.replace(tzinfo=timezone.utc)
 
         end_dt = None
         if end_time:
@@ -112,6 +114,7 @@ def analyze(
             except ValueError:
                 click.echo(f"Error: Invalid end-time format. Use YYYY-MM-DD HH:MM:SS", err=True)
                 sys.exit(1)
+            end_dt = end_dt.replace(tzinfo=timezone.utc)
 
         parser = _get_parser(log_format, file)
 
@@ -191,38 +194,48 @@ def _parse_file(
     start_time: Optional[datetime] = None,
     end_time: Optional[datetime] = None,
 ):
-    """Parse log file with optional filtering."""
+    """Parse log file with optional filtering.
+
+    Level filtering and time filtering both operate on the *parsed* entry
+    rather than the raw line. Filtering on the raw line's text (via
+    ``detect_log_level``) can disagree with the level the parser actually
+    assigns — e.g. an Apache 200 response with the word "ERROR" in the
+    User-Agent string is ``INFO`` per the parser (200 status) but
+    ``ERROR`` per the raw-text scan, while an Apache 500 response with
+    no level keyword in the line is ``ERROR`` per the parser but
+    ``UNKNOWN`` per the raw-text scan. Parsing first means the level and
+    timestamp filter against the same values the analyzer will use.
+    """
     entries = []
-    
+
     from log_analyzer_cli.parsers import ParsedEntry
     from log_analyzer_cli.utils import detect_log_level, parse_timestamp
     import re
-    
+
     compiled_pattern = re.compile(search_pattern) if search_pattern else None
-    
+
     for line in read_log_file(file_path):
         line = line.rstrip("\n\r")
         if not line:
             continue
-
-        if include_levels:
-            level = detect_log_level(line)
-            if level not in include_levels:
-                continue
-
+
         if compiled_pattern and not compiled_pattern.search(line):
             continue
-        
-        timestamp = parse_timestamp(line)
-        if start_time and timestamp and timestamp < start_time:
+
+        parsed = parser.parse(line)
+        if not parsed:
             continue
-        if end_time and timestamp and timestamp > end_time:
+
+        if include_levels and parsed.level not in include_levels:
             continue
-
-        parsed = parser.parse(line)
-        if parsed:
-            entries.append(parsed)
-
+
+        if start_time and parsed.timestamp and parsed.timestamp < start_time:
+            continue
+        if end_time and parsed.timestamp and parsed.timestamp > end_time:
+            continue
+
+        entries.append(parsed)
+
     return entries
 
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -87,9 +87,59 @@ def test_analyze_time_filter(self, runner, json_file):
     def test_analyze_auto_format_detection(self, runner, json_file):
         result = runner.invoke(main, ["analyze", str(json_file), "--format", "auto"])
         assert result.exit_code == 0
-
+
+    def test_analyze_time_filter_naive_start_with_aware_log(self, runner, tmp_path):
+        """Naive --start-time must compare against TZ-aware log timestamps without raising."""
+        import json as json_mod
+        log = tmp_path / "tz-aware.log"
+        log.write_text(
+            json_mod.dumps({
+                "timestamp": "2025-03-20T10:15:32.123Z",
+                "level": "INFO",
+                "message": "Started",
+            }) + "\n" +
+            json_mod.dumps({
+                "timestamp": "2025-03-20T10:17:00.000Z",
+                "level": "ERROR",
+                "message": "Failed",
+            }) + "\n"
+        )
+        result = runner.invoke(
+            main,
+            ["analyze", str(log), "-f", "json", "-o", "json",
+             "--start-time", "2025-03-20 10:16:00"],
+        )
+        assert result.exit_code == 0, result.output
+        assert '"parsed_entries": 1' in result.output
+        assert "ERROR" in result.output
+        assert "Started" not in result.output
+
+    def test_analyze_level_filter_uses_parser_level_not_raw_text(self, runner, tmp_path):
+        """--levels filters by the parser's level, not the raw-line keyword scan."""
+        log = tmp_path / "apache.log"
+        log.write_text(
+            '127.0.0.1 - - [20/Mar/2025:10:15:32 +0000] '
+            '"GET / HTTP/1.1" 200 1234 "-" "Mozilla/ERROR/5.0"\n'
+            '127.0.0.1 - - [20/Mar/2025:10:15:33 +0000] '
+            '"GET /api HTTP/1.1" 500 5678 "-" "curl/8.0"\n'
+        )
+        result = runner.invoke(
+            main,
+            ["analyze", str(log), "-f", "apache", "-o", "json", "-l", "ERROR"],
+        )
+        assert result.exit_code == 0, result.output
+        assert '"parsed_entries": 1' in result.output
+        assert "ERROR" in result.output
+        result = runner.invoke(
+            main,
+            ["analyze", str(log), "-f", "apache", "-o", "json", "-l", "INFO"],
+        )
+        assert result.exit_code == 0, result.output
+        assert '"parsed_entries": 1' in result.output
+        assert "INFO" in result.output
+
     def test_help(self, runner):
         result = runner.invoke(main, ["--help"])
         assert result.exit_code == 0
         assert "analyze" in result.output
-        assert "formats" in result.output
+        assert "formats" in result.output