From 0b25121341dd0f57c2b474f53a354a14ffb55896 Mon Sep 17 00:00:00 2001
From: longieirl <noreply@github.com>
Date: Wed, 25 Mar 2026 14:49:38 +0000
Subject: [PATCH 1/5] =?UTF-8?q?feat(#28):=20add=20ServiceRegistry=20and=20?=
 =?UTF-8?q?wire=20processor=20=E2=80=94=20enrichment/classify/dedup/sort?=
 =?UTF-8?q?=20pipeline=20now=20centralised=20in=20ServiceRegistry;=20Trans?=
 =?UTF-8?q?actionProcessingOrchestrator=20trimmed=20to=20group=5Fby=5Fiban?=
 =?UTF-8?q?=20only?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/bankstatements_core/processor.py      |  25 +-
 .../services/service_registry.py              | 252 ++++++++++++++++++
 .../transaction_processing_orchestrator.py    | 156 +----------
 3 files changed, 279 insertions(+), 154 deletions(-)
 create mode 100644 packages/parser-core/src/bankstatements_core/services/service_registry.py

diff --git a/packages/parser-core/src/bankstatements_core/processor.py b/packages/parser-core/src/bankstatements_core/processor.py
index 7a3a9b5..5ee3358 100644
--- a/packages/parser-core/src/bankstatements_core/processor.py
+++ b/packages/parser-core/src/bankstatements_core/processor.py
@@ -28,6 +28,7 @@
     TransactionSortingService,
 )
 from bankstatements_core.services.transaction_filter import TransactionFilterService
+from bankstatements_core.services.service_registry import ServiceRegistry
 from bankstatements_core.services.transaction_processing_orchestrator import (
     TransactionProcessingOrchestrator,
 )
@@ -122,6 +123,7 @@ def __init__(
         activity_log: Any | None = None,
         entitlements: Any | None = None,
         template_registry: Any | None = None,
+        registry: ServiceRegistry | None = None,
     ):
         """
         Initialize the bank statement processor.
@@ -250,6 +252,17 @@ def __init__(
             sorting_service=self._sorting_service,
         )
 
+        # ServiceRegistry: single wiring point for transaction processing
+        if registry is not None:
+            self._registry = registry
+        else:
+            self._registry = ServiceRegistry.from_config(
+                config,
+                entitlements=entitlements,
+                duplicate_detector=self._duplicate_service,
+                sorting_service=self._sorting_service,
+            )
+
         self._output_orchestrator = OutputOrchestrator(
             output_dir=self.output_dir,
             output_strategies=self.output_strategies,
@@ -322,8 +335,8 @@ def run(self) -> dict:
                 pdf_ibans[extraction.source_file.name] = extraction.iban
             all_rows.extend(transactions_to_dicts(extraction.transactions))
 
-        # Step 2: Group transactions by IBAN (delegated to orchestrator)
-        rows_by_iban = self._transaction_orchestrator.group_by_iban(all_rows, pdf_ibans)
+        # Step 2: Group transactions by IBAN (delegated to registry)
+        rows_by_iban = self._registry.group_by_iban(all_rows, pdf_ibans)
         logger.debug(
             f"Grouped {len(all_rows)} transactions into {len(rows_by_iban)} IBAN groups"
         )
@@ -402,11 +415,9 @@ def _process_transaction_group(
                     f"Using template '{template_id}' for transaction type classification"
                 )
 
-        # Detect duplicates and sort (delegated to orchestrator)
-        unique_rows, duplicate_rows = (
-            self._transaction_orchestrator.process_transaction_group(
-                iban_rows, template=template
-            )
+        # Detect duplicates and sort (delegated to registry)
+        unique_rows, duplicate_rows = self._registry.process_transaction_group(
+            iban_rows, template=template
         )
 
         # Filter duplicates to remove any empty rows and header rows
diff --git a/packages/parser-core/src/bankstatements_core/services/service_registry.py b/packages/parser-core/src/bankstatements_core/services/service_registry.py
new file mode 100644
index 0000000..79cf9f3
--- /dev/null
+++ b/packages/parser-core/src/bankstatements_core/services/service_registry.py
@@ -0,0 +1,252 @@
+"""ServiceRegistry — single wiring point for transaction processing services.
+
+Centralises construction of duplicate detection, sorting, IBAN grouping, and
+the enrichment/classification pipeline that was previously spread across
+TransactionProcessingOrchestrator and BankStatementProcessor.
+
+Usage (primary path)::
+
+    registry = ServiceRegistry.from_config(processor_config, entitlements)
+    unique, dupes = registry.process_transaction_group(rows, template)
+    grouped = registry.group_by_iban(rows, pdf_ibans)
+
+Escape hatches are available for callers that need individual services::
+
+    detector = registry.get_duplicate_detector()
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from bankstatements_core.config.processor_config import ProcessorConfig
+    from bankstatements_core.domain.protocols.services import (
+        IDuplicateDetector,
+        IIBANGrouping,
+        ITransactionSorting,
+    )
+    from bankstatements_core.entitlements import Entitlements
+    from bankstatements_core.templates.template_model import BankTemplate
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class _ServiceContext:
+    """Shared dependencies passed once to ServiceRegistry at construction time.
+
+    This is an internal dataclass — never exposed to callers.
+    """
+
+    column_names: list[str]
+    debit_columns: list[str]
+    credit_columns: list[str]
+    entitlements: Any  # Entitlements | None
+
+
+class ServiceRegistry:
+    """Single wiring point for all transaction processing services.
+
+    Callers use the primary methods for the common case.
+    Individual services are accessible via get_*() escape hatches for tests
+    or specialised callers.
+    """
+
+    def __init__(
+        self,
+        context: _ServiceContext,
+        duplicate_detector: "IDuplicateDetector",
+        sorting_service: "ITransactionSorting",
+        grouping_service: "IIBANGrouping",
+    ) -> None:
+        self._context = context
+        self._duplicate_detector = duplicate_detector
+        self._sorting_service = sorting_service
+        self._grouping_service = grouping_service
+
+    # ------------------------------------------------------------------
+    # Factory
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def from_config(
+        cls,
+        config: "ProcessorConfig",
+        entitlements: "Entitlements | None" = None,
+        duplicate_detector: "IDuplicateDetector | None" = None,
+        sorting_service: "ITransactionSorting | None" = None,
+        grouping_service: "IIBANGrouping | None" = None,
+    ) -> "ServiceRegistry":
+        """Build a ServiceRegistry from a ProcessorConfig.
+
+        Args:
+            config: Processor configuration carrying column, sorting, and
+                processing settings.
+            entitlements: Optional tier-based entitlements.
+            duplicate_detector: Override duplicate detector (default: AllFields).
+            sorting_service: Override sorting service (default: chronological
+                if config.processing.sort_by_date, else no-sort).
+            grouping_service: Override IBAN grouping service (default: suffix-4).
+
+        Returns:
+            Fully wired ServiceRegistry instance.
+        """
+        from bankstatements_core.config.column_config import get_column_names
+        from bankstatements_core.patterns.strategies import (
+            AllFieldsDuplicateStrategy,
+            ChronologicalSortingStrategy,
+            NoSortingStrategy,
+        )
+        from bankstatements_core.processor import find_matching_columns
+        from bankstatements_core.services.duplicate_detector import (
+            DuplicateDetectionService,
+        )
+        from bankstatements_core.services.iban_grouping import IBANGroupingService
+        from bankstatements_core.services.sorting_service import (
+            TransactionSortingService,
+        )
+
+        column_names = get_column_names(
+            config.extraction.columns
+        ) if config.extraction.columns else []
+        debit_columns = find_matching_columns(column_names, ["debit"])
+        credit_columns = find_matching_columns(column_names, ["credit"])
+
+        context = _ServiceContext(
+            column_names=column_names,
+            debit_columns=debit_columns,
+            credit_columns=credit_columns,
+            entitlements=entitlements,
+        )
+
+        if duplicate_detector is None:
+            duplicate_detector = DuplicateDetectionService(AllFieldsDuplicateStrategy())
+
+        if sorting_service is None:
+            sort_strategy = (
+                ChronologicalSortingStrategy()
+                if config.processing.sort_by_date
+                else NoSortingStrategy()
+            )
+            sorting_service = TransactionSortingService(sort_strategy)
+
+        if grouping_service is None:
+            grouping_service = IBANGroupingService()
+
+        return cls(context, duplicate_detector, sorting_service, grouping_service)
+
+    # ------------------------------------------------------------------
+    # Primary methods (80 % case)
+    # ------------------------------------------------------------------
+
+    def process_transaction_group(
+        self,
+        transactions: list[dict],
+        template: "BankTemplate | None" = None,
+    ) -> tuple[list[dict], list[dict]]:
+        """Enrich → classify → deduplicate → sort a group of transactions.
+
+        This replaces the explicit five-call chain that was previously spread
+        across BankStatementProcessor and TransactionProcessingOrchestrator.
+
+        Args:
+            transactions: List of transaction dicts for a single IBAN group.
+            template: Optional bank template used for transaction type keywords.
+
+        Returns:
+            Tuple of (unique_transactions, duplicate_transactions).
+        """
+        enriched = self._enrich_with_filename(transactions)
+        enriched = self._enrich_with_document_type(enriched)
+        enriched = self._classify_transaction_types(enriched, template)
+
+        unique_rows, duplicate_rows = self._duplicate_detector.detect_and_separate(
+            enriched
+        )
+        logger.info(
+            "Duplicate detection: %d unique, %d duplicates",
+            len(unique_rows),
+            len(duplicate_rows),
+        )
+
+        sorted_rows = self._sorting_service.sort(unique_rows)
+        return sorted_rows, duplicate_rows
+
+    def group_by_iban(
+        self,
+        transactions: list[dict],
+        pdf_ibans: dict[str, str],
+    ) -> dict[str, list[dict]]:
+        """Group transactions by IBAN suffix.
+
+        Args:
+            transactions: Flat list of all transaction dicts.
+            pdf_ibans: Mapping of PDF filename → IBAN string.
+
+        Returns:
+            Dict of IBAN suffix → list of transaction dicts.
+        """
+        return self._grouping_service.group_by_iban(transactions, pdf_ibans)
+
+    # ------------------------------------------------------------------
+    # Escape hatches (20 % case)
+    # ------------------------------------------------------------------
+
+    def get_duplicate_detector(self) -> "IDuplicateDetector":
+        return self._duplicate_detector
+
+    def get_sorting_service(self) -> "ITransactionSorting":
+        return self._sorting_service
+
+    def get_grouping_service(self) -> "IIBANGrouping":
+        return self._grouping_service
+
+    # ------------------------------------------------------------------
+    # Internal enrichment helpers (inlined from TransactionProcessingOrchestrator)
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _enrich_with_filename(transactions: list[dict]) -> list[dict]:
+        """Set Filename key from source_pdf if not already present."""
+        for row in transactions:
+            if "Filename" not in row:
+                row["Filename"] = row.get("source_pdf", "")
+        return transactions
+
+    @staticmethod
+    def _enrich_with_document_type(
+        transactions: list[dict], default_type: str = "bank_statement"
+    ) -> list[dict]:
+        """Set document_type if not already present."""
+        for row in transactions:
+            if "document_type" not in row:
+                row["document_type"] = default_type
+        return transactions
+
+    @staticmethod
+    def _classify_transaction_types(
+        transactions: list[dict],
+        template: "BankTemplate | None" = None,
+    ) -> list[dict]:
+        """Classify each transaction using Chain of Responsibility."""
+        from bankstatements_core.services.transaction_type_classifier import (
+            create_transaction_type_classifier_chain,
+        )
+
+        if not transactions:
+            return transactions
+
+        document_type = transactions[0].get("document_type")
+        classifier = create_transaction_type_classifier_chain(document_type)
+
+        for transaction in transactions:
+            transaction["transaction_type"] = classifier.classify(transaction, template)
+
+        logger.info(
+            "Transaction type classification: %d transactions classified",
+            len(transactions),
+        )
+        return transactions
diff --git a/packages/parser-core/src/bankstatements_core/services/transaction_processing_orchestrator.py b/packages/parser-core/src/bankstatements_core/services/transaction_processing_orchestrator.py
index a5c2ed1..077bb25 100644
--- a/packages/parser-core/src/bankstatements_core/services/transaction_processing_orchestrator.py
+++ b/packages/parser-core/src/bankstatements_core/services/transaction_processing_orchestrator.py
@@ -1,10 +1,10 @@
 """Transaction Processing Orchestrator for bank statements.
 
-This module orchestrates transaction-level processing including:
-- IBAN grouping
-- Duplicate detection
-- Sorting
-- Filename enrichment
+This module orchestrates IBAN grouping. Enrichment, classification, duplicate
+detection and sorting have moved to ServiceRegistry.
+
+Note: This class is retained for backward compatibility. A follow-up issue will
+track its complete removal once all callers migrate to ServiceRegistry.
 """
 
 from __future__ import annotations
@@ -18,19 +18,16 @@
         IIBANGrouping,
         ITransactionSorting,
     )
-    from bankstatements_core.templates.template_model import BankTemplate
 
 logger = logging.getLogger(__name__)
 
 
 class TransactionProcessingOrchestrator:
-    """Orchestrates transaction processing pipeline.
+    """Orchestrates IBAN grouping for transaction processing.
 
-    Handles:
-    - Grouping transactions by IBAN
-    - Duplicate detection and removal
-    - Transaction sorting (chronological or none)
-    - Enrichment with source filename
+    Note: enrichment and classification logic has moved to ServiceRegistry.
+    This class is retained for backward compatibility and will be removed in a
+    follow-up.
     """
 
     def __init__(
@@ -63,140 +60,5 @@ def group_by_iban(
 
         Returns:
             Dictionary mapping IBANs to their transactions
-
-        Examples:
-            >>> orchestrator = TransactionProcessingOrchestrator(detector, sorter)
-            >>> grouped = orchestrator.group_by_iban(transactions, ibans)
-            >>> for iban, txns in grouped.items():
-            ...     print(f"IBAN {iban}: {len(txns)} transactions")
         """
         return self.grouping_service.group_by_iban(transactions, pdf_ibans)
-
-    def process_transaction_group(
-        self, transactions: list[dict], template: "BankTemplate | None" = None
-    ) -> tuple[list[dict], list[dict]]:
-        """Process a group of transactions (detect duplicates, sort, enrich).
-
-        Args:
-            transactions: List of transaction dictionaries
-            template: Optional bank template with transaction type keywords
-
-        Returns:
-            Tuple of (unique_transactions, duplicate_transactions)
-
-        Examples:
-            >>> orchestrator = TransactionProcessingOrchestrator(detector, sorter)
-            >>> unique, dupes = orchestrator.process_transaction_group(transactions)
-            >>> total = len(unique) + len(dupes)
-            >>> print(f"Found {len(dupes)} duplicates in {total} transactions")
-        """
-        # 0. Enrich with metadata (filename, document_type, transaction_type)
-        enriched = self.enrich_with_filename(transactions)
-        enriched = self.enrich_with_document_type(enriched)
-        enriched = self.classify_transaction_types(enriched, template)
-
-        # 1. Detect duplicates (now with transaction_type available)
-        unique_rows, duplicate_rows = self.duplicate_detector.detect_and_separate(
-            enriched
-        )
-
-        logger.info(
-            "Duplicate detection: %d unique, %d duplicates",
-            len(unique_rows),
-            len(duplicate_rows),
-        )
-
-        # 2. Sort transactions if configured
-        sorted_rows = self.sorting_service.sort(unique_rows)
-
-        return sorted_rows, duplicate_rows
-
-    def enrich_with_filename(self, transactions: list[dict]) -> list[dict]:
-        """Add 'Filename' column to transactions if not present.
-
-        Args:
-            transactions: List of transaction dictionaries
-
-        Returns:
-            Transactions with 'Filename' column added
-
-        Examples:
-            >>> orchestrator = TransactionProcessingOrchestrator(detector, sorter)
-            >>> enriched = orchestrator.enrich_with_filename(transactions)
-            >>> all('Filename' in txn for txn in enriched)
-            True
-        """
-        for row in transactions:
-            if "Filename" not in row:
-                row["Filename"] = row.get("source_pdf", "")
-
-        return transactions
-
-    def enrich_with_document_type(
-        self, transactions: list[dict], default_type: str = "bank_statement"
-    ) -> list[dict]:
-        """Add 'document_type' column to transactions if not present.
-
-        Args:
-            transactions: List of transaction dictionaries
-            default_type: Default document type if missing (default: "bank_statement")
-
-        Returns:
-            Transactions with document_type field
-
-        Examples:
-            >>> orchestrator = TransactionProcessingOrchestrator(detector, sorter)
-            >>> enriched = orchestrator.enrich_with_document_type(transactions)
-            >>> all('document_type' in txn for txn in enriched)
-            True
-        """
-        for row in transactions:
-            if "document_type" not in row:
-                row["document_type"] = default_type
-
-        return transactions
-
-    def classify_transaction_types(
-        self, transactions: list[dict], template: "BankTemplate | None" = None
-    ) -> list[dict]:
-        """Classify transaction type for each transaction.
-
-        Uses Chain of Responsibility pattern to apply multiple classification
-        strategies in sequence until one succeeds.
-
-        Args:
-            transactions: List of transaction dictionaries
-            template: Optional bank template with transaction type keywords
-
-        Returns:
-            Transactions with transaction_type field added
-
-        Examples:
-            >>> orchestrator = TransactionProcessingOrchestrator(detector, sorter)
-            >>> enriched = orchestrator.classify_transaction_types(transactions, template)
-            >>> all('transaction_type' in txn for txn in enriched)
-            True
-        """
-        from bankstatements_core.services.transaction_type_classifier import (
-            create_transaction_type_classifier_chain,
-        )
-
-        if not transactions:
-            return transactions
-
-        # Get document type from first transaction (all in group have same type)
-        document_type = transactions[0].get("document_type")
-
-        # Create classifier chain appropriate for document type
-        classifier = create_transaction_type_classifier_chain(document_type)
-
-        # Classify each transaction
-        for transaction in transactions:
-            transaction["transaction_type"] = classifier.classify(transaction, template)
-
-        logger.info(
-            "Transaction type classification: %d transactions classified",
-            len(transactions),
-        )
-
-        return transactions

From eba90746e0cbc3e3f45db16ab583f0ab729ef6cf Mon Sep 17 00:00:00 2001
From: longieirl <noreply@github.com>
Date: Wed, 25 Mar 2026 14:58:19 +0000
Subject: [PATCH 2/5] feat(#28): wire ServiceRegistry through factory and
 builder; add boundary tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ProcessorFactory.create_custom and BankStatementProcessorBuilder.build() now
  construct ServiceRegistry.from_config() and inject it via registry= kwarg,
  so the processor no longer builds its own registry internally
- Fix wrong import of ChronologicalSortingStrategy (patterns.strategies →
  services.sorting_service) in service_registry.py
- Add tests/services/test_service_registry.py with boundary tests covering
  from_config wiring, process_transaction_group pipeline, group_by_iban
  delegation, and escape hatches
- Trim test_transaction_processing_orchestrator.py to group_by_iban only;
  removed tests for methods that moved to ServiceRegistry
---
 .../builders/processor_builder.py             |   5 +
 .../bankstatements_core/patterns/factories.py |   5 +
 .../services/service_registry.py              |  12 +-
 .../tests/services/test_service_registry.py   | 150 ++++++++++
 ...est_transaction_processing_orchestrator.py | 257 +-----------------
 5 files changed, 171 insertions(+), 258 deletions(-)
 create mode 100644 packages/parser-core/tests/services/test_service_registry.py

diff --git a/packages/parser-core/src/bankstatements_core/builders/processor_builder.py b/packages/parser-core/src/bankstatements_core/builders/processor_builder.py
index 70b3cc2..d5b90b2 100644
--- a/packages/parser-core/src/bankstatements_core/builders/processor_builder.py
+++ b/packages/parser-core/src/bankstatements_core/builders/processor_builder.py
@@ -342,6 +342,10 @@ def build(self) -> "BankStatementProcessor":
             config.extraction.enable_dynamic_boundary,
         )
 
+        from bankstatements_core.services.service_registry import ServiceRegistry
+
+        registry = ServiceRegistry.from_config(config, entitlements=self._entitlements)
+
         return BankStatementProcessor(
             config=config,
             output_strategies=self._output_strategies,
@@ -349,4 +353,5 @@ def build(self) -> "BankStatementProcessor":
             repository=self._repository,
             activity_log=self._activity_log,
             entitlements=self._entitlements,
+            registry=registry,
         )
diff --git a/packages/parser-core/src/bankstatements_core/patterns/factories.py b/packages/parser-core/src/bankstatements_core/patterns/factories.py
index a556a8f..b260b23 100644
--- a/packages/parser-core/src/bankstatements_core/patterns/factories.py
+++ b/packages/parser-core/src/bankstatements_core/patterns/factories.py
@@ -210,11 +210,16 @@ def create_custom(
             ),
         )
 
+        from bankstatements_core.services.service_registry import ServiceRegistry
+
+        registry = ServiceRegistry.from_config(config, entitlements=entitlements)
+
         processor = BankStatementProcessor(
             config=config,
             output_strategies=output_strategies,
             duplicate_strategy=duplicate_strategy,
             entitlements=entitlements,
+            registry=registry,
         )
 
         return processor
diff --git a/packages/parser-core/src/bankstatements_core/services/service_registry.py b/packages/parser-core/src/bankstatements_core/services/service_registry.py
index 79cf9f3..54d179d 100644
--- a/packages/parser-core/src/bankstatements_core/services/service_registry.py
+++ b/packages/parser-core/src/bankstatements_core/services/service_registry.py
@@ -97,8 +97,6 @@ def from_config(
         from bankstatements_core.config.column_config import get_column_names
         from bankstatements_core.patterns.strategies import (
             AllFieldsDuplicateStrategy,
-            ChronologicalSortingStrategy,
-            NoSortingStrategy,
         )
         from bankstatements_core.processor import find_matching_columns
         from bankstatements_core.services.duplicate_detector import (
@@ -106,12 +104,16 @@ def from_config(
         )
         from bankstatements_core.services.iban_grouping import IBANGroupingService
         from bankstatements_core.services.sorting_service import (
+            ChronologicalSortingStrategy,
+            NoSortingStrategy,
             TransactionSortingService,
         )
 
-        column_names = get_column_names(
-            config.extraction.columns
-        ) if config.extraction.columns else []
+        column_names = (
+            get_column_names(config.extraction.columns)
+            if config.extraction.columns
+            else []
+        )
         debit_columns = find_matching_columns(column_names, ["debit"])
         credit_columns = find_matching_columns(column_names, ["credit"])
 
diff --git a/packages/parser-core/tests/services/test_service_registry.py b/packages/parser-core/tests/services/test_service_registry.py
new file mode 100644
index 0000000..6366167
--- /dev/null
+++ b/packages/parser-core/tests/services/test_service_registry.py
@@ -0,0 +1,150 @@
+"""Boundary tests for ServiceRegistry.
+
+Covers:
+- from_config builds a fully wired registry
+- process_transaction_group runs enrich → classify → dedup → sort
+- group_by_iban delegates to grouping service
+- escape hatches return the injected services
+"""
+
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+from unittest.mock import Mock
+
+import pytest
+
+from bankstatements_core.config.processor_config import (
+    ExtractionConfig,
+    OutputConfig,
+    ProcessingConfig,
+    ProcessorConfig,
+)
+from bankstatements_core.services.duplicate_detector import DuplicateDetectionService
+from bankstatements_core.services.iban_grouping import IBANGroupingService
+from bankstatements_core.services.service_registry import ServiceRegistry
+from bankstatements_core.services.sorting_service import TransactionSortingService
+
+
+def _minimal_config(sort_by_date: bool = True) -> ProcessorConfig:
+    tmp = Path(tempfile.mkdtemp())
+    return ProcessorConfig(
+        input_dir=tmp,
+        output_dir=tmp,
+        extraction=ExtractionConfig(),
+        processing=ProcessingConfig(sort_by_date=sort_by_date),
+        output=OutputConfig(),
+    )
+
+
+class TestFromConfig:
+    def test_builds_registry_with_default_services(self):
+        config = _minimal_config()
+        registry = ServiceRegistry.from_config(config)
+        assert isinstance(registry.get_duplicate_detector(), DuplicateDetectionService)
+        assert isinstance(registry.get_sorting_service(), TransactionSortingService)
+        assert isinstance(registry.get_grouping_service(), IBANGroupingService)
+
+    def test_injected_services_override_defaults(self):
+        config = _minimal_config()
+        mock_dedup = Mock()
+        mock_sort = Mock()
+        mock_group = Mock()
+        registry = ServiceRegistry.from_config(
+            config,
+            duplicate_detector=mock_dedup,
+            sorting_service=mock_sort,
+            grouping_service=mock_group,
+        )
+        assert registry.get_duplicate_detector() is mock_dedup
+        assert registry.get_sorting_service() is mock_sort
+        assert registry.get_grouping_service() is mock_group
+
+
+class TestProcessTransactionGroup:
+    def test_enriches_classifies_deduplicates_and_sorts(self):
+        config = _minimal_config()
+        transactions = [
+            {"Date": "01/01/2024", "Details": "Test", "source_pdf": "a.pdf"},
+        ]
+
+        mock_dedup = Mock()
+        mock_dedup.detect_and_separate.return_value = (transactions, [])
+        mock_sort = Mock()
+        mock_sort.sort.side_effect = lambda x: x
+
+        registry = ServiceRegistry.from_config(
+            config,
+            duplicate_detector=mock_dedup,
+            sorting_service=mock_sort,
+        )
+
+        unique, dupes = registry.process_transaction_group(transactions)
+
+        # Enrichment happened before dedup
+        called_with = mock_dedup.detect_and_separate.call_args[0][0]
+        assert called_with[0]["Filename"] == "a.pdf"
+        assert called_with[0]["document_type"] == "bank_statement"
+        assert "transaction_type" in called_with[0]
+
+        # Sort was called and result returned
+        mock_sort.sort.assert_called_once()
+        assert unique == transactions
+        assert dupes == []
+
+    def test_returns_unique_and_duplicate_lists(self):
+        config = _minimal_config()
+        tx1 = {"Date": "01/01/2024", "Details": "A", "source_pdf": "x.pdf"}
+        tx2 = {"Date": "01/01/2024", "Details": "A", "source_pdf": "x.pdf"}
+
+        mock_dedup = Mock()
+        mock_dedup.detect_and_separate.return_value = ([tx1], [tx2])
+        mock_sort = Mock()
+        mock_sort.sort.side_effect = lambda x: x
+
+        registry = ServiceRegistry.from_config(
+            config,
+            duplicate_detector=mock_dedup,
+            sorting_service=mock_sort,
+        )
+
+        unique, dupes = registry.process_transaction_group([tx1, tx2])
+        assert len(unique) == 1
+        assert len(dupes) == 1
+
+
+class TestGroupByIban:
+    def test_delegates_to_grouping_service(self):
+        config = _minimal_config()
+        mock_group = Mock()
+        mock_group.group_by_iban.return_value = {"1234": []}
+
+        registry = ServiceRegistry.from_config(config, grouping_service=mock_group)
+        transactions = [{"Date": "01/01/2024"}]
+        pdf_ibans = {"a.pdf": "IE001234"}
+
+        result = registry.group_by_iban(transactions, pdf_ibans)
+
+        mock_group.group_by_iban.assert_called_once_with(transactions, pdf_ibans)
+        assert result == {"1234": []}
+
+
+class TestEscapeHatches:
+    def test_get_duplicate_detector_returns_injected(self):
+        config = _minimal_config()
+        mock_dedup = Mock()
+        registry = ServiceRegistry.from_config(config, duplicate_detector=mock_dedup)
+        assert registry.get_duplicate_detector() is mock_dedup
+
+    def test_get_sorting_service_returns_injected(self):
+        config = _minimal_config()
+        mock_sort = Mock()
+        registry = ServiceRegistry.from_config(config, sorting_service=mock_sort)
+        assert registry.get_sorting_service() is mock_sort
+
+    def test_get_grouping_service_returns_injected(self):
+        config = _minimal_config()
+        mock_group = Mock()
+        registry = ServiceRegistry.from_config(config, grouping_service=mock_group)
+        assert registry.get_grouping_service() is mock_group
diff --git a/packages/parser-core/tests/services/test_transaction_processing_orchestrator.py b/packages/parser-core/tests/services/test_transaction_processing_orchestrator.py
index 6d3e233..30f8f6a 100644
--- a/packages/parser-core/tests/services/test_transaction_processing_orchestrator.py
+++ b/packages/parser-core/tests/services/test_transaction_processing_orchestrator.py
@@ -1,7 +1,7 @@
 """Tests for TransactionProcessingOrchestrator.
 
-This module tests the transaction processing orchestration including
-duplicate detection, sorting, and metadata enrichment.
+The orchestrator now only handles IBAN grouping. Enrichment, classification,
+duplicate detection and sorting are tested via test_service_registry.py.
 """
 
 from __future__ import annotations
@@ -17,244 +17,34 @@
 
 @pytest.fixture
 def mock_duplicate_detector():
-    """Create a mock duplicate detector."""
     detector = Mock()
-    detector.detect_and_separate.return_value = ([], [])  # (unique, duplicates)
+    detector.detect_and_separate.return_value = ([], [])
     return detector
 
 
 @pytest.fixture
 def mock_sorting_service():
-    """Create a mock sorting service."""
     sorter = Mock()
-    sorter.sort.side_effect = lambda x: x  # Pass through
+    sorter.sort.side_effect = lambda x: x
     return sorter
 
 
 @pytest.fixture
 def orchestrator(mock_duplicate_detector, mock_sorting_service):
-    """Create a TransactionProcessingOrchestrator instance."""
     return TransactionProcessingOrchestrator(
         duplicate_detector=mock_duplicate_detector,
         sorting_service=mock_sorting_service,
     )
 
 
-class TestEnrichWithFilename:
-    """Test filename enrichment."""
-
-    def test_adds_filename_when_missing(self, orchestrator):
-        """Test that Filename is added when not present."""
-        transactions = [
-            {"Date": "01/12/2023", "Details": "Test", "source_pdf": "test.pdf"},
-            {"Date": "02/12/2023", "Details": "Test2", "source_pdf": "test2.pdf"},
-        ]
-
-        enriched = orchestrator.enrich_with_filename(transactions)
-
-        assert enriched[0]["Filename"] == "test.pdf"
-        assert enriched[1]["Filename"] == "test2.pdf"
-
-    def test_preserves_existing_filename(self, orchestrator):
-        """Test that existing Filename is preserved."""
-        transactions = [
-            {
-                "Date": "01/12/2023",
-                "Details": "Test",
-                "Filename": "existing.pdf",
-                "source_pdf": "other.pdf",
-            }
-        ]
-
-        enriched = orchestrator.enrich_with_filename(transactions)
-
-        # Should keep existing Filename
-        assert enriched[0]["Filename"] == "existing.pdf"
-
-    def test_handles_missing_source_pdf(self, orchestrator):
-        """Test handling when source_pdf is missing."""
-        transactions = [{"Date": "01/12/2023", "Details": "Test"}]
-
-        enriched = orchestrator.enrich_with_filename(transactions)
-
-        # Should add empty Filename
-        assert enriched[0]["Filename"] == ""
-
-
-class TestEnrichWithDocumentType:
-    """Test document type enrichment."""
-
-    def test_adds_document_type_when_missing(self, orchestrator):
-        """Test that document_type is added when not present."""
-        transactions = [
-            {"Date": "01/12/2023", "Details": "Test1"},
-            {"Date": "02/12/2023", "Details": "Test2"},
-        ]
-
-        enriched = orchestrator.enrich_with_document_type(transactions)
-
-        # Should add default document_type
-        assert enriched[0]["document_type"] == "bank_statement"
-        assert enriched[1]["document_type"] == "bank_statement"
-
-    def test_preserves_existing_document_type(self, orchestrator):
-        """Test that existing document_type is preserved."""
-        transactions = [
-            {
-                "Date": "01/12/2023",
-                "Details": "Card Purchase",
-                "document_type": "credit_card_statement",
-            },
-            {
-                "Date": "02/12/2023",
-                "Details": "Loan Payment",
-                "document_type": "loan_statement",
-            },
-        ]
-
-        enriched = orchestrator.enrich_with_document_type(transactions)
-
-        # Should preserve existing types
-        assert enriched[0]["document_type"] == "credit_card_statement"
-        assert enriched[1]["document_type"] == "loan_statement"
-
-    def test_custom_default_type(self, orchestrator):
-        """Test using custom default document type."""
-        transactions = [{"Date": "01/12/2023", "Details": "Test"}]
-
-        enriched = orchestrator.enrich_with_document_type(
-            transactions, default_type="credit_card_statement"
-        )
-
-        assert enriched[0]["document_type"] == "credit_card_statement"
-
-    def test_mixed_existing_and_missing(self, orchestrator):
-        """Test handling mix of transactions with and without document_type."""
-        transactions = [
-            {
-                "Date": "01/12/2023",
-                "Details": "Has Type",
-                "document_type": "credit_card_statement",
-            },
-            {"Date": "02/12/2023", "Details": "No Type"},
-            {
-                "Date": "03/12/2023",
-                "Details": "Has Type",
-                "document_type": "loan_statement",
-            },
-        ]
-
-        enriched = orchestrator.enrich_with_document_type(transactions)
-
-        # Should preserve existing and add default for missing
-        assert enriched[0]["document_type"] == "credit_card_statement"
-        assert enriched[1]["document_type"] == "bank_statement"  # Default added
-        assert enriched[2]["document_type"] == "loan_statement"
-
-
-class TestProcessTransactionGroup:
-    """Test transaction group processing."""
-
-    def test_enriches_before_duplicate_detection(
-        self, mock_duplicate_detector, mock_sorting_service
-    ):
-        """Test that enrichment happens before duplicate detection."""
-        transactions = [
-            {"Date": "01/12/2023", "Details": "Test", "source_pdf": "test.pdf"}
-        ]
-
-        # Setup mock to capture what's passed to detect_and_separate
-        captured_input = []
-
-        def capture_input(txns):
-            captured_input.extend(txns)
-            return (txns, [])  # Return as unique, no duplicates
-
-        mock_duplicate_detector.detect_and_separate.side_effect = capture_input
-
-        orchestrator = TransactionProcessingOrchestrator(
-            duplicate_detector=mock_duplicate_detector,
-            sorting_service=mock_sorting_service,
-        )
-
-        orchestrator.process_transaction_group(transactions)
-
-        # Verify enrichment happened before duplicate detection
-        assert len(captured_input) > 0
-        assert "Filename" in captured_input[0]
-        assert "document_type" in captured_input[0]
-
-    def test_enrichment_includes_both_fields(
-        self, mock_duplicate_detector, mock_sorting_service
-    ):
-        """Test that both Filename and document_type are added."""
-        transactions = [
-            {"Date": "01/12/2023", "Details": "Test", "source_pdf": "test.pdf"}
-        ]
-
-        captured_input = []
-
-        def capture_input(txns):
-            captured_input.extend(txns)
-            return (txns, [])
-
-        mock_duplicate_detector.detect_and_separate.side_effect = capture_input
-
-        orchestrator = TransactionProcessingOrchestrator(
-            duplicate_detector=mock_duplicate_detector,
-            sorting_service=mock_sorting_service,
-        )
-
-        orchestrator.process_transaction_group(transactions)
-
-        # Both fields should be present
-        assert captured_input[0]["Filename"] == "test.pdf"
-        assert captured_input[0]["document_type"] == "bank_statement"
-
-    def test_enrichment_preserves_existing_document_type(
-        self, mock_duplicate_detector, mock_sorting_service
-    ):
-        """Test that existing document_type from extraction is preserved."""
-        transactions = [
-            {
-                "Date": "01/12/2023",
-                "Details": "Card Purchase",
-                "Filename": "card.pdf",
-                "document_type": "credit_card_statement",  # Already set by extractor
-            }
-        ]
-
-        captured_input = []
-
-        def capture_input(txns):
-            captured_input.extend(txns)
-            return (txns, [])
-
-        mock_duplicate_detector.detect_and_separate.side_effect = capture_input
-
-        orchestrator = TransactionProcessingOrchestrator(
-            duplicate_detector=mock_duplicate_detector,
-            sorting_service=mock_sorting_service,
-        )
-
-        orchestrator.process_transaction_group(transactions)
-
-        # Should preserve credit_card_statement, not override with default
-        assert captured_input[0]["document_type"] == "credit_card_statement"
-
-
 class TestGroupByIBAN:
-    """Test IBAN grouping."""
-
     def test_delegates_to_grouping_service(self, orchestrator):
-        """Test that group_by_iban delegates to grouping service."""
         transactions = [
             {"Date": "01/12/2023", "Details": "Test", "Filename": "test1.pdf"},
             {"Date": "02/12/2023", "Details": "Test2", "Filename": "test2.pdf"},
         ]
         pdf_ibans = {"test1.pdf": "IE12345", "test2.pdf": "IE67890"}
 
-        # Mock the grouping service
         orchestrator.grouping_service = Mock()
         orchestrator.grouping_service.group_by_iban.return_value = {
             "IE12345": [transactions[0]],
@@ -263,47 +53,8 @@ def test_delegates_to_grouping_service(self, orchestrator):
 
         result = orchestrator.group_by_iban(transactions, pdf_ibans)
 
-        # Verify delegation
         orchestrator.grouping_service.group_by_iban.assert_called_once_with(
             transactions, pdf_ibans
         )
         assert "IE12345" in result
         assert "IE67890" in result
-
-
-class TestEnrichmentIntegration:
-    """Integration tests for enrichment in processing pipeline."""
-
-    def test_full_pipeline_with_document_types(
-        self, mock_duplicate_detector, mock_sorting_service
-    ):
-        """Test full processing pipeline preserves document types."""
-        transactions = [
-            {
-                "Date": "01/12/2023",
-                "Details": "Bank",
-                "source_pdf": "bank.pdf",
-                "document_type": "bank_statement",
-            },
-            {
-                "Date": "02/12/2023",
-                "Details": "Card",
-                "source_pdf": "card.pdf",
-                "document_type": "credit_card_statement",
-            },
-        ]
-
-        # Setup mocks to pass through
-        mock_duplicate_detector.detect_and_separate.return_value = (transactions, [])
-        mock_sorting_service.sort.side_effect = lambda x: x
-
-        orchestrator = TransactionProcessingOrchestrator(
-            duplicate_detector=mock_duplicate_detector,
-            sorting_service=mock_sorting_service,
-        )
-
-        unique, duplicates = orchestrator.process_transaction_group(transactions)
-
-        # Verify document types preserved through pipeline
-        assert unique[0]["document_type"] == "bank_statement"
-        assert unique[1]["document_type"] == "credit_card_statement"

From 0635820829468fe6b828274f05bce33e2053343c Mon Sep 17 00:00:00 2001
From: longieirl <noreply@github.com>
Date: Wed, 25 Mar 2026 14:59:45 +0000
Subject: [PATCH 3/5] chore: untrack logs/ dirs and broaden gitignore to
 **/logs/

---
 .gitignore                                         |  4 ++--
 .../parser-free/logs/processing_activity.jsonl     | 14 --------------
 2 files changed, 2 insertions(+), 16 deletions(-)
 delete mode 100644 packages/parser-free/logs/processing_activity.jsonl

diff --git a/.gitignore b/.gitignore
index 1fdd1ba..7b6fe98 100644
--- a/.gitignore
+++ b/.gitignore
@@ -121,8 +121,8 @@ output/
 **/output/
 
 # Logs
-logs/*.log
-logs/processing_activity.jsonl
+logs/
+**/logs/
 *.log
 
 # Development Artifacts
diff --git a/packages/parser-free/logs/processing_activity.jsonl b/packages/parser-free/logs/processing_activity.jsonl
deleted file mode 100644
index 676a79f..0000000
--- a/packages/parser-free/logs/processing_activity.jsonl
+++ /dev/null
@@ -1,14 +0,0 @@
-{"timestamp": "2026-03-18T12:32:34.490093", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:32:34.493341", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:32:34.502456", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:32:34.510783", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:32:34.518456", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:32:34.522150", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:32:34.527303", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:34:07.790615", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:34:07.794068", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:34:07.805428", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:34:07.813279", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:34:07.819899", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:34:07.826404", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}
-{"timestamp": "2026-03-18T12:34:07.830051", "event_type": "processing", "pdf_count": 0, "pages_read": 0, "transaction_count": 0, "duplicate_count": 0, "output_formats": ["csv", "json", "excel"], "duration_seconds": 0.0}

From b21bbaa4fc55a448c273d0e2c3f3693348a88ef2 Mon Sep 17 00:00:00 2001
From: longieirl <noreply@github.com>
Date: Wed, 25 Mar 2026 15:02:44 +0000
Subject: [PATCH 4/5] style: fix isort ordering in service_registry.py

---
 .../src/bankstatements_core/services/service_registry.py      | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/packages/parser-core/src/bankstatements_core/services/service_registry.py b/packages/parser-core/src/bankstatements_core/services/service_registry.py
index 54d179d..52acfaa 100644
--- a/packages/parser-core/src/bankstatements_core/services/service_registry.py
+++ b/packages/parser-core/src/bankstatements_core/services/service_registry.py
@@ -95,9 +95,7 @@ def from_config(
             Fully wired ServiceRegistry instance.
         """
         from bankstatements_core.config.column_config import get_column_names
-        from bankstatements_core.patterns.strategies import (
-            AllFieldsDuplicateStrategy,
-        )
+        from bankstatements_core.patterns.strategies import AllFieldsDuplicateStrategy
         from bankstatements_core.processor import find_matching_columns
         from bankstatements_core.services.duplicate_detector import (
             DuplicateDetectionService,

From 3ca3969b67c8fdcb68b4de5c994eb2d04fb1c5d6 Mon Sep 17 00:00:00 2001
From: longieirl <noreply@github.com>
Date: Wed, 25 Mar 2026 15:09:42 +0000
Subject: [PATCH 5/5] style: fix isort ordering in processor.py

---
 packages/parser-core/src/bankstatements_core/processor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/parser-core/src/bankstatements_core/processor.py b/packages/parser-core/src/bankstatements_core/processor.py
index 5ee3358..4f524f6 100644
--- a/packages/parser-core/src/bankstatements_core/processor.py
+++ b/packages/parser-core/src/bankstatements_core/processor.py
@@ -22,13 +22,13 @@
 from bankstatements_core.services.pdf_processing_orchestrator import (
     PDFProcessingOrchestrator,
 )
+from bankstatements_core.services.service_registry import ServiceRegistry
 from bankstatements_core.services.sorting_service import (
     ChronologicalSortingStrategy,
     NoSortingStrategy,
     TransactionSortingService,
 )
 from bankstatements_core.services.transaction_filter import TransactionFilterService
-from bankstatements_core.services.service_registry import ServiceRegistry
 from bankstatements_core.services.transaction_processing_orchestrator import (
     TransactionProcessingOrchestrator,
 )