Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/log_analyzer_cli/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,13 @@ def analyze(
Returns:
Analysis result.
"""
# Clear any patterns accumulated by a previous call so re-using the
# same LogAnalyzer across multiple batches does not leak earlier
# groups into the new result. Callers that want to keep history
# across calls should manage their own aggregation instead of
# relying on internal instance state.
self._error_patterns = {}

result = AnalysisResult()
result.total_lines = len(entries)
result.parsed_entries = len(entries)
Expand Down
4 changes: 2 additions & 2 deletions src/log_analyzer_cli/parsers/syslog.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ def parse(self, line: str) -> Optional[ParsedEntry]:
groups = match.groupdict()

timestamp = self._parse_timestamp(groups.get("timestamp", ""))
level = detect_log_level(line) # Check full line for level

metadata = {}
if groups.get("host"):
Expand All @@ -94,11 +93,12 @@ def parse(self, line: str) -> Optional[ParsedEntry]:
metadata["process"] = groups["process"]
source = groups["process"]
else:
# Use host as source when no process name
source = groups.get("host")
if groups.get("pid"):
metadata["pid"] = groups["pid"]

level = detect_log_level(groups.get("message", ""))

return ParsedEntry(
raw=line,
timestamp=timestamp,
Expand Down
2 changes: 2 additions & 0 deletions src/log_analyzer_cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ def _try_parse_datetime(ts_str: str) -> Optional[datetime]:
"%Y-%m-%dT%H:%M:%S.%f",
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%S.%f%z",
"%Y-%m-%d %H:%M:%S.%f%z",
Comment on lines +44 to +45

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Missing space-separated timezone format without fractional seconds still drops valid timestamps.

Line 22 matches YYYY-MM-DD HH:MM:SS+HH:MM (and ...Z), but _try_parse_datetime still has no %Y-%m-%d %H:%M:%S%z format, so those lines return None and lose timestamps.

Suggested fix
 formats = [
     "%Y-%m-%d %H:%M:%S.%f",
     "%Y-%m-%dT%H:%M:%S.%f",
     "%Y-%m-%d %H:%M:%S",
     "%Y-%m-%dT%H:%M:%S",
     "%Y-%m-%dT%H:%M:%S.%f%z",
     "%Y-%m-%d %H:%M:%S.%f%z",
     "%Y-%m-%dT%H:%M:%S%z",
+    "%Y-%m-%d %H:%M:%S%z",
     "%d/%b/%Y:%H:%M:%S",
     "%b %d %H:%M:%S",
     "%Y/%m/%d %H:%M:%S",
 ]
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
"%Y-%m-%dT%H:%M:%S.%f%z",
"%Y-%m-%d %H:%M:%S.%f%z",
formats = [
"%Y-%m-%d %H:%M:%S.%f",
"%Y-%m-%dT%H:%M:%S.%f",
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%S.%f%z",
"%Y-%m-%d %H:%M:%S.%f%z",
"%Y-%m-%dT%H:%M:%S%z",
"%Y-%m-%d %H:%M:%S%z",
"%d/%b/%Y:%H:%M:%S",
"%b %d %H:%M:%S",
"%Y/%m/%d %H:%M:%S",
]
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/log_analyzer_cli/utils.py` around lines 44 - 45, The _try_parse_datetime
function is missing a datetime format string for space-separated timestamps
without fractional seconds. Add the format string "%Y-%m-%d %H:%M:%S%z" to the
list of datetime format strings being tried in _try_parse_datetime to handle
timestamps like "2024-01-15 10:30:45+00:00" and prevent them from returning None
and being dropped.

"%Y-%m-%dT%H:%M:%S%z",
"%d/%b/%Y:%H:%M:%S",
"%b %d %H:%M:%S",
Expand Down
25 changes: 25 additions & 0 deletions tests/test_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,31 @@ def test_reset(self):
analyzer.reset()
assert len(analyzer._error_patterns) == 0

def test_repeated_analyze_does_not_leak_error_groups(self):
analyzer = LogAnalyzer()
first_batch = [
ParsedEntry(
raw="Error: timeout after 30s",
level="ERROR",
message="timeout after 30s",
),
]
second_batch = [
ParsedEntry(
raw="Error: db connection lost",
level="ERROR",
message="db connection lost",
),
]

first_result = analyzer.analyze(first_batch)
second_result = analyzer.analyze(second_batch)

assert len(first_result.error_groups) == 1
assert len(second_result.error_groups) == 1
assert second_result.error_groups[0].pattern != first_result.error_groups[0].pattern
assert second_result.error_groups[0].count == 1


class TestAnalyzeLogEntries:
"""Tests for the analyze_log_entries function."""
Expand Down
93 changes: 93 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Tests for log-analyzer-cli utility functions."""

from __future__ import annotations

from datetime import datetime, timedelta, timezone

from log_analyzer_cli.utils import (
_try_parse_datetime,
detect_log_level,
parse_timestamp,
)


class TestTryParseDatetime:
"""Tests for _try_parse_datetime."""

def test_iso_with_microseconds_and_offset_colon(self):
# Python's datetime.isoformat() emits exactly this form
assert _try_parse_datetime("2026-06-21T19:25:00.123456+02:00") == datetime(
2026, 6, 21, 19, 25, 0, 123456,
tzinfo=timezone(timedelta(hours=2)),
)

def test_iso_with_microseconds_and_offset_no_colon(self):
# ISO 8601 also allows ±HHMM form
assert _try_parse_datetime("2026-06-21T19:25:00.123456+0200") == datetime(
2026, 6, 21, 19, 25, 0, 123456,
tzinfo=timezone(timedelta(hours=2)),
)

def test_space_separator_with_microseconds_and_offset(self):
# The space-separator variant also gets a combined format
assert _try_parse_datetime("2026-06-21 19:25:00.123456+02:00") == datetime(
2026, 6, 21, 19, 25, 0, 123456,
tzinfo=timezone(timedelta(hours=2)),
)

def test_iso_microseconds_with_z_suffix(self):
# Existing Z-substitution should still work for fractional seconds
assert _try_parse_datetime("2026-06-21T19:25:00.123Z") == datetime(
2026, 6, 21, 19, 25, 0, 123000,
tzinfo=timezone.utc,
)

def test_iso_microseconds_no_timezone(self):
# The pre-existing microsecond form must keep working
assert _try_parse_datetime("2026-06-21T19:25:00.123456") == datetime(
2026, 6, 21, 19, 25, 0, 123456,
)

def test_iso_no_microseconds_with_offset(self):
# The pre-existing offset form must keep working
assert _try_parse_datetime("2026-06-21T19:25:00+02:00") == datetime(
2026, 6, 21, 19, 25, 0,
tzinfo=timezone(timedelta(hours=2)),
)

def test_iso_no_microseconds_no_timezone(self):
# The pre-existing plain form must keep working
assert _try_parse_datetime("2026-06-21T19:25:00") == datetime(
2026, 6, 21, 19, 25, 0,
)


class TestParseTimestampIntegration:
"""End-to-end checks through parse_timestamp."""

def test_combined_form_extracts_full_datetime(self):
# parse_timestamp's first regex captures the whole ISO timestamp;
# the inner _try_parse_datetime must now handle microseconds+offset.
assert parse_timestamp(
"2026-06-21T19:25:00.123456+02:00 ERROR something failed"
) == datetime(2026, 6, 21, 19, 25, 0, 123456,
tzinfo=timezone(timedelta(hours=2)))

def test_z_fractional_extracts_full_datetime(self):
assert parse_timestamp(
"2026-06-21T19:25:00.123Z ERROR something failed"
) == datetime(2026, 6, 21, 19, 25, 0, 123000, tzinfo=timezone.utc)

def test_plain_form_still_parses(self):
# regression guard
assert parse_timestamp("2026-06-21 19:25:00 INFO ok") == datetime(
2026, 6, 21, 19, 25, 0,
)


class TestDetectLogLevel:
def test_returns_uppercase_level(self):
assert detect_log_level("2025-01-01 error: bad") == "ERROR"

def test_returns_unknown_for_plain_text(self):
assert detect_log_level("just a normal line") == "UNKNOWN"
Loading