Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,6 @@ tmp/
*.tgz
*.rar
*.7z

# GSD planning artifacts
.planning/
1 change: 1 addition & 0 deletions packages/parser-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ markers = [
]
filterwarnings = [
"ignore:TestResult has no addDuration method:RuntimeWarning",
"ignore::DeprecationWarning:bankstatements_core.pdf_table_extractor",
]

[tool.coverage.run]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ def _extract_page(self, page: Any, page_num: int) -> list[dict] | None:
page_rows = self._row_builder.build_rows(words)

if self.page_validation_enabled:
from bankstatements_core.pdf_table_extractor import validate_page_structure
from bankstatements_core.extraction.validation_facade import (
validate_page_structure,
)

if not validate_page_structure(page_rows, self.columns):
logger.info(
Expand All @@ -153,7 +155,9 @@ def _extract_page(self, page: Any, page_num: int) -> list[dict] | None:
)
return None

from bankstatements_core.pdf_table_extractor import merge_continuation_lines
from bankstatements_core.extraction.validation_facade import (
merge_continuation_lines,
)

return merge_continuation_lines(page_rows, self.columns)

Expand Down Expand Up @@ -184,7 +188,9 @@ def _determine_boundaries_and_extract(
all_words = initial_area.extract_words(use_text_flow=True)

if self.header_check_enabled:
from bankstatements_core.pdf_table_extractor import detect_table_headers
from bankstatements_core.extraction.validation_facade import (
detect_table_headers,
)

header_top = (
header_check_top_y
Expand All @@ -198,7 +204,7 @@ def _determine_boundaries_and_extract(
logger.info(f"Page {page_num}: No table headers detected, skipping")
return None

from bankstatements_core.pdf_table_extractor import (
from bankstatements_core.extraction.extraction_facade import (
detect_table_end_boundary_smart,
)

Expand All @@ -225,7 +231,9 @@ def _determine_boundaries_and_extract(
words = table_area.extract_words(use_text_flow=True)

if self.header_check_enabled:
from bankstatements_core.pdf_table_extractor import detect_table_headers
from bankstatements_core.extraction.validation_facade import (
detect_table_headers,
)

header_top = (
header_check_top_y
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from typing import TYPE_CHECKING, Any

from bankstatements_core.config.app_config import AppConfig, ConfigurationError
from bankstatements_core.config.column_config import get_columns_config
from bankstatements_core.entitlements import EntitlementError, Entitlements
from bankstatements_core.pdf_table_extractor import get_columns_config

if TYPE_CHECKING:
from bankstatements_core.processor import BankStatementProcessor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,20 @@
from __future__ import annotations

import logging
import warnings

import pdfplumber # noqa: F401 - used by extraction module

warnings.warn(
"bankstatements_core.pdf_table_extractor is a backward-compatibility shim "
"and will be removed in a future version. "
"Import directly from bankstatements_core.extraction.extraction_facade, "
"bankstatements_core.extraction.validation_facade, or "
"bankstatements_core.extraction.row_classification_facade instead.",
DeprecationWarning,
stacklevel=2,
)

logger = logging.getLogger(__name__)

# Re-export column configuration (backward compatibility)
Expand Down
Loading
Loading