diff --git a/packages/parser-core/src/bankstatements_core/domain/currency.py b/packages/parser-core/src/bankstatements_core/domain/currency.py index e813711..0744b24 100644 --- a/packages/parser-core/src/bankstatements_core/domain/currency.py +++ b/packages/parser-core/src/bankstatements_core/domain/currency.py @@ -120,6 +120,23 @@ def to_float( return None +def reroute_cr_suffix(row: dict[str, str]) -> None: + """Move a CR-suffixed Debit value to the Credit column. + + AIB CC statements encode credits (payments/refunds) as amounts suffixed + with 'CR' (e.g. '300.00CR') in a single Amount column, which the template + aliases to Debit. This function detects the suffix, strips it, writes the + clean value to Credit, and clears Debit. + + Args: + row: Row dictionary (modified in-place) + """ + debit = row.get("Debit", "") + if debit.upper().endswith("CR"): + row["Credit"] = debit[:-2].strip() + row["Debit"] = "" + + def format_currency( value: float | None, currency_symbol: str = "€", diff --git a/packages/parser-core/src/bankstatements_core/extraction/page_header_analyser.py b/packages/parser-core/src/bankstatements_core/extraction/page_header_analyser.py index 4ea6713..da983e0 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/page_header_analyser.py +++ b/packages/parser-core/src/bankstatements_core/extraction/page_header_analyser.py @@ -29,9 +29,11 @@ # Matches lines like: # "Payment Due 3 Mar 2026" # "Payment Due Date: 20 Feb 2026" +# "Total Minimum Payment Due 17th April, 2026" +# "Total Minimum Payment Due 20th March, 2026" _PAYMENT_DUE_PATTERNS = [ - r"Payment\s+Due\s+Date\s*[:\s]\s*\d{1,2}\s+\w+\s+(\d{4})", - r"Payment\s+Due\s+\d{1,2}\s+\w+\s+(\d{4})", + r"Payment\s+Due\s+Date\s*[:\s]\s*\d{1,2}(?:st|nd|rd|th)?\s+\w+,?\s+(\d{4})", + r"Payment\s+Due\s+\d{1,2}(?:st|nd|rd|th)?\s+\w+,?\s+(\d{4})", ] diff --git a/packages/parser-core/src/bankstatements_core/extraction/row_post_processor.py b/packages/parser-core/src/bankstatements_core/extraction/row_post_processor.py index 6a5c6a2..a9fdf08 100644 --- a/packages/parser-core/src/bankstatements_core/extraction/row_post_processor.py +++ b/packages/parser-core/src/bankstatements_core/extraction/row_post_processor.py @@ -12,6 +12,7 @@ from datetime import datetime from typing import TYPE_CHECKING +from bankstatements_core.domain.currency import reroute_cr_suffix from bankstatements_core.domain.models.extraction_scoring_config import ( ExtractionScoringConfig, ) @@ -77,6 +78,10 @@ def __init__( # noqa: PLR0913 ) self._last_source: str = "" + def _reroute_cr_amounts(self, row: dict) -> None: + """Reroute CR-suffixed debit amounts to the Credit column.""" + reroute_cr_suffix(row) + def _apply_column_aliases(self, row: dict) -> None: """Rename non-canonical row keys to canonical names using template.column_aliases. @@ -109,6 +114,7 @@ def process(self, row: dict, current_date: str) -> str: """ self._last_source = "" self._apply_column_aliases(row) + self._reroute_cr_amounts(row) if self._row_classifier.classify(row, self._columns) != "transaction": return current_date diff --git a/packages/parser-core/tests/domain/test_currency.py b/packages/parser-core/tests/domain/test_currency.py index 9f691c2..070924f 100644 --- a/packages/parser-core/tests/domain/test_currency.py +++ b/packages/parser-core/tests/domain/test_currency.py @@ -7,6 +7,7 @@ from bankstatements_core.domain.currency import ( CurrencyParseError, format_currency, + reroute_cr_suffix, to_float, ) @@ -124,6 +125,44 @@ def test_strip_currency_symbols(): assert strip_currency_symbols("123.45") == "123.45" +class TestRerouteCrSuffix: + """Tests for reroute_cr_suffix (issue #131).""" + + def test_cr_suffix_moves_to_credit(self): + """300.00CR in Debit is moved to Credit with suffix stripped.""" + row = {"Debit": "300.00CR", "Credit": ""} + reroute_cr_suffix(row) + assert row["Credit"] == "300.00" + assert row["Debit"] == "" + + def test_cr_suffix_lowercase(self): + """300.00cr (lowercase) is treated the same as CR.""" + row = {"Debit": "300.00cr", "Credit": ""} + reroute_cr_suffix(row) + assert row["Credit"] == "300.00" + assert row["Debit"] == "" + + def test_plain_debit_unchanged(self): + """A plain debit amount without CR suffix is not rerouted.""" + row = {"Debit": "150.00", "Credit": ""} + reroute_cr_suffix(row) + assert row["Debit"] == "150.00" + assert row["Credit"] == "" + + def test_empty_debit_is_noop(self): + """Empty Debit value is a no-op.""" + row = {"Debit": "", "Credit": ""} + reroute_cr_suffix(row) + assert row["Debit"] == "" + assert row["Credit"] == "" + + def test_missing_debit_key_is_noop(self): + """Row with no Debit key is a no-op.""" + row = {"Credit": ""} + reroute_cr_suffix(row) + assert row == {"Credit": ""} + + def test_yen_through_transaction_get_amount(): """¥ symbol is stripped correctly by Transaction._clean_amount_string().""" from bankstatements_core.domain.models.transaction import Transaction diff --git a/packages/parser-core/tests/extraction/test_row_post_processor.py b/packages/parser-core/tests/extraction/test_row_post_processor.py index 75a89a0..40463f6 100644 --- a/packages/parser-core/tests/extraction/test_row_post_processor.py +++ b/packages/parser-core/tests/extraction/test_row_post_processor.py @@ -430,3 +430,52 @@ def test_statement_year_only_on_transaction_rows(self): } proc.process(row, "") assert "statement_year" not in row + + +class TestCRAmountRerouting: + """Tests for CR-suffix rerouting in RowPostProcessor (issue #131).""" + + def _make_cc_processor(self) -> RowPostProcessor: + template = Mock() + template.document_type = "credit_card_statement" + template.id = "aib_credit_card" + template.column_aliases = {"Amount": "Debit"} + cc_columns = { + "Date": (0, 50), + "Details": (50, 200), + "Debit": (200, 300), + "Credit": (300, 400), + } + return RowPostProcessor( + columns=cc_columns, + row_classifier=_make_classifier("transaction"), + template=template, + filename_date="", + filename="cc_statement.pdf", + ) + + def test_cr_amount_rerouted_to_credit(self): + """300.00CR in Debit is moved to Credit and Debit is cleared after process().""" + proc = self._make_cc_processor() + row = { + "Date": "3 Feb", + "Details": "PAYMENT THANK YOU", + "Debit": "300.00CR", + "Credit": "", + } + proc.process(row, "") + assert row["Credit"] == "300.00" + assert row["Debit"] == "" + + def test_plain_debit_unchanged(self): + """A plain debit amount (no CR suffix) stays in Debit after process().""" + proc = self._make_cc_processor() + row = { + "Date": "3 Feb", + "Details": "TESCO STORE", + "Debit": "42.50", + "Credit": "", + } + proc.process(row, "") + assert row["Debit"] == "42.50" + assert row["Credit"] == "" diff --git a/packages/parser-core/tests/integration/snapshots/output_snapshot.json b/packages/parser-core/tests/integration/snapshots/output_snapshot.json index 9350d32..b1e9a51 100644 --- a/packages/parser-core/tests/integration/snapshots/output_snapshot.json +++ b/packages/parser-core/tests/integration/snapshots/output_snapshot.json @@ -17,7 +17,7 @@ "size_bytes": 28746 }, "bank_statements_3656.xlsx": { - "size_bytes": 7323 + "size_bytes": 7240 }, "bank_statements_9015.csv": { "row_count": 400, @@ -28,7 +28,7 @@ "size_bytes": 179922 }, "bank_statements_9015.xlsx": { - "size_bytes": 18947 + "size_bytes": 18866 }, "duplicates.json": { "record_count": 0, @@ -47,7 +47,7 @@ "excluded_files", "summary" ], - "size_bytes": 547 + "size_bytes": 867 }, "expense_analysis.json": { "keys": [ @@ -110,9 +110,9 @@ }, "processing_summary": { "duplicates": 0, - "pages_read": 24, - "pdf_count": 4, - "pdfs_extracted": 3, + "pages_read": 26, + "pdf_count": 5, + "pdfs_extracted": 4, "transactions": 467 }, "summary": {