diff --git a/pyproject.toml b/pyproject.toml
index 7d7d91b..a458259 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,6 +11,7 @@ dependencies = [
     "rapidfuzz~=3.12.2",
     "pymupdf~=1.26.3",
     "openai",
+    "docling>=2.75.0",
 ]
 
 [build-system]
diff --git a/transaction_parser/hooks.py b/transaction_parser/hooks.py
index d389f4f..a944370 100644
--- a/transaction_parser/hooks.py
+++ b/transaction_parser/hooks.py
@@ -27,3 +27,8 @@
         "on_update": "transaction_parser.transaction_parser.overrides.communication.on_update",
     }
 }
+
+pdf_processors = {
+    "OCRMyPDF": "transaction_parser.transaction_parser.utils.pdf_processor.OCRMyPDFProcessor",
+    "Docling": "transaction_parser.transaction_parser.utils.pdf_processor.DoclingPDFProcessor",
+}
diff --git a/transaction_parser/patches.txt b/transaction_parser/patches.txt
index 8096de1..27002f4 100644
--- a/transaction_parser/patches.txt
+++ b/transaction_parser/patches.txt
@@ -4,4 +4,5 @@
 
 [post_model_sync]
 # Patches added in this section will be executed after doctypes are migrated
-execute:from transaction_parser.install import after_install; after_install() #2
\ No newline at end of file
+execute:from transaction_parser.install import after_install; after_install() #2
+transaction_parser.patches.set_default_pdf_processor #1
diff --git a/transaction_parser/patches/__init__.py b/transaction_parser/patches/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/patches/set_default_pdf_processor.py b/transaction_parser/patches/set_default_pdf_processor.py
new file mode 100644
index 0000000..3d9fad3
--- /dev/null
+++ b/transaction_parser/patches/set_default_pdf_processor.py
@@ -0,0 +1,13 @@
+import frappe
+
+from transaction_parser.transaction_parser.utils.pdf_processor import (
+    DEFAULT_PDF_PROCESSOR,
+)
+
+
+def execute():
+    DOCTYPE = "Transaction Parser Settings"
+    FIELD = "pdf_processor"
+
+    if not frappe.db.get_single_value(DOCTYPE, FIELD):
+        frappe.db.set_single_value(DOCTYPE, FIELD, DEFAULT_PDF_PROCESSOR)
diff --git a/transaction_parser/transaction_parser/__init__.py b/transaction_parser/transaction_parser/__init__.py
index 83555ce..abd27da 100644
--- a/transaction_parser/transaction_parser/__init__.py
+++ b/transaction_parser/transaction_parser/__init__.py
@@ -28,6 +28,7 @@ def parse(transaction, country, file_url, ai_model=None, page_limit=None):
         ai_model=cstr(ai_model),
         page_limit=cint(page_limit),
         queue="long",
+        now=frappe.conf.developer_mode,
     )
 
 
diff --git a/transaction_parser/transaction_parser/ai_integration/parser.py b/transaction_parser/transaction_parser/ai_integration/parser.py
index f74eec2..6db6d3f 100644
--- a/transaction_parser/transaction_parser/ai_integration/parser.py
+++ b/transaction_parser/transaction_parser/ai_integration/parser.py
@@ -132,7 +132,7 @@ def get_api_key(self) -> str:
             _("API Key not found for model {0}").format(self.model.service_provider)
         )
 
-    def get_content(self, response: dict) -> dict | str:
+    def get_content(self, response: dict) -> dict:
         """Extract content from API response."""
         content = response["choices"][0]["message"]["content"]
 
diff --git a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.json b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.json
index 73a5a16..53dc83c 100644
--- a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.json
+++ b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.json
@@ -9,6 +9,7 @@
   "enabled",
   "ai_model_section",
   "default_ai_model",
+  "pdf_processor",
   "api_keys",
   "transaction_configurations_section",
   "invoice_lookback_count",
@@ -92,6 +93,15 @@
    "mandatory_depends_on": "eval: doc.enabled",
    "options": "DeepSeek Chat\nDeepSeek Reasoner\nOpenAI gpt-4o\nOpenAI gpt-4o-mini\nOpenAI gpt-5\nOpenAI gpt-5-mini\nGoogle Gemini Pro\nGoogle Gemini Flash"
   },
+  {
+   "default": "OCRMyPDF",
+   "depends_on": "eval: doc.enabled",
+   "description": "Select the library to use for PDF text extraction",
+   "fieldname": "pdf_processor",
+   "fieldtype": "Select",
+   "label": "PDF Processor",
+   "options": "OCRMyPDF\nDocling"
+  },
   {
    "depends_on": "eval: doc.enabled",
    "fieldname": "api_keys",
@@ -156,7 +166,7 @@
  "index_web_pages_for_search": 1,
  "issingle": 1,
  "links": [],
- "modified": "2025-09-08 08:48:58.870032",
+ "modified": "2026-03-14 13:35:17.150533",
  "modified_by": "Administrator",
  "module": "Transaction Parser",
  "name": "Transaction Parser Settings",
@@ -177,4 +187,4 @@
  "sort_field": "modified",
  "sort_order": "DESC",
  "states": []
-}
\ No newline at end of file
+}
diff --git a/transaction_parser/transaction_parser/utils/file_processor.py b/transaction_parser/transaction_parser/utils/file_processor.py
index 8a3ccd9..3cebb9b 100644
--- a/transaction_parser/transaction_parser/utils/file_processor.py
+++ b/transaction_parser/transaction_parser/utils/file_processor.py
@@ -1,76 +1,104 @@
 import io
 
 import frappe
-import ocrmypdf
-import pymupdf
 from frappe import _
+from frappe.core.doctype.file.file import File
 from frappe.utils.csvutils import read_csv_content
 from frappe.utils.xlsxutils import (
     read_xls_file_from_attached_file,
     read_xlsx_file_from_attached_file,
 )
 
+from transaction_parser.transaction_parser.utils.pdf_processor import (
+    PDFProcessor,
+    get_pdf_processor,
+)
 
-class FileProcessor:
-    """Process files: PDF (trim pages, apply OCR), CSV/Excel (parse data), extract content."""
 
-    def get_content(self, doc, page_limit=None):
+class FileProcessor:
+    """
+    Process files: PDF (trim pages, apply OCR), CSV/Excel (parse data), extract content.
+    """
+
+    def get_content(
+        self,
+        doc: File,
+        page_limit: int | None = None,
+        pdf_processor: PDFProcessor | None = None,
+    ) -> str | None:
         if doc.file_type == "PDF":
-            return self._process_pdf(doc, page_limit)
-        elif doc.file_type in ["CSV", "XLSX", "XLS"]:
-            return self._process_spreadsheet(doc)
-        else:
-            frappe.throw(_("Only PDF, CSV, and Excel files are supported"))
+            return self.process_pdf(doc, page_limit, pdf_processor)
 
-    def _process_pdf(self, doc, page_limit=None):
-        """Process PDF files with OCR and page limiting."""
-        self.file = io.BytesIO(doc.get_content())
-        self._remove_extra_pages(page_limit)
-        self._apply_ocr()
-        return self._get_text()
+        if doc.file_type in ("CSV", "XLSX", "XLS"):
+            return self.process_spreadsheet(doc)
 
-    def _process_spreadsheet(self, doc):
-        """Process CSV and Excel files."""
+        frappe.throw(
+            title=_("Unsupported File Type"),
+            msg=_("Only PDF, CSV, and Excel files are supported"),
+        )
+
+    def process_pdf(
+        self,
+        doc: File,
+        page_limit: int | None = None,
+        pdf_processor: PDFProcessor | None = None,
+    ) -> str:
+        """
+        Process PDF files using the configured PDF processor strategy.
+        """
+        pdf_processor = pdf_processor or get_pdf_processor()
+        return pdf_processor.process(doc, page_limit)
+
+    def process_spreadsheet(self, doc: File) -> str:
+        """
+        Process CSV and Excel files.
+        """
         file_content = doc.get_content()
 
         if doc.file_type == "CSV":
-            file_content_str = self._decode_csv_content(file_content)
+            file_content_str = self.decode_csv_content(file_content)
             rows = read_csv_content(file_content_str)
         elif doc.file_type == "XLSX":
             rows = read_xlsx_file_from_attached_file(fcontent=file_content)
         elif doc.file_type == "XLS":
             rows = read_xls_file_from_attached_file(file_content)
+        else:
+            frappe.throw(
+                title=_("Unsupported File Type"),
+                msg=_(
+                    "Cannot process spreadsheet with file type: {0}. <br> Supported types are CSV, XLSX, and XLS."
+                ).format(doc.file_type),
+            )
 
         # Convert rows to a formatted string representation
-        return self._format_rows_as_text(rows)
+        return self.format_rows_as_text(rows)
 
-    def _decode_csv_content(self, content):
-        """Decode CSV file content with fallback encodings."""
+    def decode_csv_content(self, content: str | bytes) -> str:
+        """
+        Decode CSV file content with fallback encodings.
+        """
         # If content is already a string, return as-is
         if isinstance(content, str):
             return content
 
         # If content is bytes, decode it
-        encodings = ["utf-8", "utf-8-sig", "latin1", "cp1252"]
-
-        for encoding in encodings:
+        # ! Note: Always keep `latin1` as the last fallback encoding, as it can decode any byte sequence without errors (Garbage)
+        for encoding in ("utf-8", "utf-8-sig", "cp1252", "latin1"):
             try:
                 return content.decode(encoding)
             except UnicodeDecodeError:
                 continue
 
-        # If all encodings fail, try with error handling
-        try:
-            return content.decode("utf-8", errors="replace")
-        except Exception:
-            frappe.throw(
-                _(
-                    "Unable to decode CSV file. Please ensure the file is saved with a supported encoding."
-                )
+        frappe.throw(
+            _(
+                "Unable to decode CSV file. Please ensure the file is saved with a supported encoding."
             )
+        )
 
-    def _format_rows_as_text(self, rows):
-        """Convert rows to a text format suitable for AI processing."""
+    def format_rows_as_text(self, rows: list) -> str:
+        """
+        Convert rows to a text format suitable for AI processing.
+        """
         if not rows:
             frappe.throw(_("No data found in the file."))
 
@@ -106,56 +134,3 @@ def _format_rows_as_text(self, rows):
         text_parts.append(f"Total columns: {len(rows[0])}")
 
         return "\n".join(text_parts)
-
-    def _remove_extra_pages(self, page_limit=None):
-        if not page_limit:
-            return
-
-        input_pdf = pymupdf.open(stream=self.file, filetype="pdf")
-        output_pdf = pymupdf.open()
-        output_pdf.insert_pdf(input_pdf, to_page=page_limit - 1)
-
-        temp_file = io.BytesIO()
-        output_pdf.save(temp_file)
-
-        output_pdf.close()
-        input_pdf.close()
-
-        self.file = temp_file
-        self.file.seek(0)
-
-    def _apply_ocr(self):
-        doc = pymupdf.open(stream=self.file, filetype="pdf")
-        pages_to_ocr = [
-            str(i) for i, page in enumerate(doc, 1) if not page.get_text("text").strip()
-        ]
-
-        if not pages_to_ocr:
-            return
-
-        pages = ",".join(pages_to_ocr)
-
-        temp_file = io.BytesIO()
-        self.file.seek(0)
-
-        ocrmypdf.ocr(
-            input_file=self.file,
-            output_file=temp_file,
-            pages=pages,
-            progress_bar=False,
-            rotate_pages=True,
-            force_ocr=True,
-        )
-
-        self.file = temp_file
-        self.file.seek(0)
-
-    def _get_text(self):
-        text = ""
-        doc = pymupdf.open(stream=self.file, filetype="pdf")
-        for page in doc:
-            text += page.get_text("text")
-
-        doc.close()
-
-        return text
diff --git a/transaction_parser/transaction_parser/utils/pdf_processor.py b/transaction_parser/transaction_parser/utils/pdf_processor.py
new file mode 100644
index 0000000..8d0df88
--- /dev/null
+++ b/transaction_parser/transaction_parser/utils/pdf_processor.py
@@ -0,0 +1,223 @@
+import io
+from abc import ABC, abstractmethod
+
+import frappe
+import pymupdf
+from frappe import _
+from frappe.core.doctype.file.file import File
+
+DEFAULT_PDF_PROCESSOR = "OCRMyPDF"
+
+
+class PDFProcessor(ABC):
+    """
+    Abstract base class for PDF processors.
+
+    To add a new processor from another app:
+
+    1. Subclass PDFProcessor
+    2. Implement the `process` method
+    3. Register it via the `pdf_processors` hook in your app's hooks.py:
+
+    ```
+    pdf_processors = {
+        "MyProcessor": "my_app.utils.pdf_processor.MyPDFProcessor",
+    }
+    ```
+    """
+
+    @abstractmethod
+    def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
+        """
+        Process a PDF file and return extracted text.
+
+        Args:
+            file: PDF file as BytesIO stream or Frappe File document
+            page_limit: Maximum number of pages to process (None = all pages)
+
+        Returns:
+                Extracted text content from the PDF
+        """
+        pass
+
+    def get_sanitized_file(
+        self, file: io.BytesIO | File, page_limit: int | None = None
+    ) -> io.BytesIO:
+        """
+        Get file as BytesIO stream and trim pages if needed.
+        """
+        if isinstance(file, File):
+            file = io.BytesIO(file.get_content())
+
+        return self.trim_pages(file, page_limit)
+
+    def trim_pages(self, file: io.BytesIO, page_limit: int | None = None) -> io.BytesIO:
+        if not page_limit or page_limit <= 0:
+            file.seek(0)
+            return file
+
+        input_pdf = pymupdf.open(stream=file, filetype="pdf")
+
+        if input_pdf.page_count <= page_limit:
+            input_pdf.close()
+            file.seek(0)
+            return file
+
+        output_pdf = pymupdf.open()
+        output_pdf.insert_pdf(input_pdf, to_page=page_limit - 1)
+
+        temp_file = io.BytesIO()
+        output_pdf.save(temp_file)
+
+        output_pdf.close()
+        input_pdf.close()
+
+        temp_file.seek(0)
+        return temp_file
+
+    def get_text(self, file: io.BytesIO) -> str:
+        text = ""
+        doc = pymupdf.open(stream=file, filetype="pdf")
+
+        for page in doc:
+            text += page.get_text("text")
+
+        doc.close()
+
+        return text
+
+
+class DoclingPDFProcessor(PDFProcessor):
+    """
+    PDF processor using Docling for document conversion and text extraction.
+
+    Docling provides advanced document understanding including table detection,
+    formula recognition, reading order detection, and OCR.
+    """
+
+    _converter = None
+
+    def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
+        from docling.datamodel.base_models import ConversionStatus, DocumentStream
+
+        file = self.get_sanitized_file(file, page_limit)
+
+        source = DocumentStream(name="document.pdf", stream=file)  # temporary name
+        converter = self._get_converter()
+        result = converter.convert(source)
+
+        if (
+            not result
+            or not result.document
+            or result.status
+            not in (
+                ConversionStatus.SUCCESS,
+                ConversionStatus.PARTIAL_SUCCESS,
+            )
+        ):
+            frappe.throw(
+                title=_("PDF Reading Failed"),
+                msg=_("Docling failed to read the document."),
+            )
+
+        return result.document.export_to_markdown()
+
+    def _get_converter(self):
+        if DoclingPDFProcessor._converter is None:
+            from docling.datamodel.base_models import InputFormat
+            from docling.datamodel.pipeline_options import PdfPipelineOptions
+            from docling.document_converter import DocumentConverter, PdfFormatOption
+
+            pipeline_options = PdfPipelineOptions()
+            pipeline_options.do_ocr = False  # TODO: OCR Setup
+
+            DoclingPDFProcessor._converter = DocumentConverter(
+                format_options={
+                    InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
+                }
+            )
+
+        return DoclingPDFProcessor._converter
+
+
+class OCRMyPDFProcessor(PDFProcessor):
+    """
+    PDF processor using PyMuPDF for text extraction and OCRMyPDF for OCR.
+    """
+
+    def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
+        file = self.get_sanitized_file(file, page_limit)
+        file = self.apply_ocr(file)
+
+        return self.get_text(file)
+
+    def apply_ocr(self, file: io.BytesIO) -> io.BytesIO:
+        import ocrmypdf
+
+        doc = pymupdf.open(stream=file, filetype="pdf")
+        pages_to_ocr = [
+            str(i) for i, page in enumerate(doc, 1) if not page.get_text("text").strip()
+        ]
+
+        doc.close()
+        file.seek(0)
+
+        if not pages_to_ocr:
+            return file
+
+        pages = ",".join(pages_to_ocr)
+
+        temp_file = io.BytesIO()
+
+        ocrmypdf.ocr(
+            input_file=file,
+            output_file=temp_file,
+            pages=pages,
+            progress_bar=False,
+            rotate_pages=True,
+            force_ocr=True,
+        )
+
+        temp_file.seek(0)
+        return temp_file
+
+
+def get_pdf_processor(name: str | None = None) -> PDFProcessor:
+    """
+    Factory function to get a PDF processor by name.
+
+    Usage:
+
+    ```
+    processor = get_pdf_processor("Docling")
+    text = processor.process(file, page_limit=5)
+    ```
+
+    To register a custom processor from another app, add to its hooks.py:
+
+    ```
+    pdf_processors = {
+        "MyProcessor": "my_app.utils.pdf_processor.MyPDFProcessor",
+    }
+    ```
+    """
+    if not name:
+        name = (
+            frappe.db.get_single_value("Transaction Parser Settings", "pdf_processor")
+            or DEFAULT_PDF_PROCESSOR
+        )
+
+    processors = frappe.get_hooks("pdf_processors") or {}
+
+    # [-1] → last in resolution order app's overrides will take precedence
+    class_path = (processors.get(name) or [None])[-1]
+
+    if not class_path:
+        frappe.throw(
+            title=_("Unsupported PDF Processor"),
+            msg=_("PDF Processor '{0}' is not supported. <br>Choose from: {1}").format(
+                name, ", ".join(processors.keys())
+            ),
+        )
+
+    return frappe.get_attr(class_path)()