diff --git a/transaction_parser/exceptions.py b/transaction_parser/exceptions.py
new file mode 100644
index 0000000..a6458d7
--- /dev/null
+++ b/transaction_parser/exceptions.py
@@ -0,0 +1,5 @@
+import frappe
+
+
+class FileProcessingError(frappe.ValidationError):
+ """Custom exception for file processing errors."""
diff --git a/transaction_parser/hooks.py b/transaction_parser/hooks.py
index a944370..15e7678 100644
--- a/transaction_parser/hooks.py
+++ b/transaction_parser/hooks.py
@@ -32,3 +32,14 @@
"OCRMyPDF": "transaction_parser.transaction_parser.utils.pdf_processor.OCRMyPDFProcessor",
"Docling": "transaction_parser.transaction_parser.utils.pdf_processor.DoclingPDFProcessor",
}
+
+export_python_type_annotations = True
+
+scheduler_events = {
+ "cron": {
+ # at 2:00 am every day
+ "0 2 * * *": [
+ "transaction_parser.parser_benchmark.doctype.parser_benchmark_settings.parser_benchmark_settings.run_scheduled_benchmarks",
+ ],
+ }
+}
diff --git a/transaction_parser/install.py b/transaction_parser/install.py
index a8ef3fd..8e6f35c 100644
--- a/transaction_parser/install.py
+++ b/transaction_parser/install.py
@@ -20,7 +20,7 @@
"label": "Is Created By Transaction Parser",
"read_only": 1,
"insert_after": "is_internal_supplier",
- }
+ },
],
"Communication": [
{
diff --git a/transaction_parser/modules.txt b/transaction_parser/modules.txt
index c672949..d31c756 100644
--- a/transaction_parser/modules.txt
+++ b/transaction_parser/modules.txt
@@ -1 +1,2 @@
-Transaction Parser
\ No newline at end of file
+Transaction Parser
+Parser Benchmark
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/__init__.py b/transaction_parser/parser_benchmark/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/doctype/__init__.py b/transaction_parser/parser_benchmark/doctype/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/__init__.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.js b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.js
new file mode 100644
index 0000000..39b8605
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.js
@@ -0,0 +1,54 @@
+// Copyright (c) 2026, Resilient Tech and contributors
+// For license information, please see license.txt
+
+frappe.ui.form.on("Parser Benchmark Dataset", {
+ refresh(frm) {
+ if (frm.doc.docstatus === 1 && frm.doc.enabled) {
+ frm.add_custom_button(__("Run Benchmark"), () => run_benchmark(frm));
+ }
+
+ if (frm.doc.docstatus === 0) set_party_type(frm);
+ },
+
+ transaction_type(frm) {
+ set_party_type(frm);
+ },
+});
+
+function run_benchmark(frm) {
+ frappe.call({
+ method: "transaction_parser.parser_benchmark.doctype.parser_benchmark_dataset.parser_benchmark_dataset.run_benchmark",
+ args: { dataset_name: frm.doc.name },
+ freeze: true,
+ freeze_message: __("Queuing benchmarks..."),
+ callback(r) {
+ if (r.message && r.message.length) {
+ frappe.show_alert({
+ message: __("{0} benchmark(s) queued.", [r.message.length]),
+ indicator: "green",
+ });
+ frappe.set_route("List", "Parser Benchmark Log", {
+ dataset: frm.doc.name,
+ status: "Queued",
+ });
+ } else {
+ frappe.show_alert({
+ message: __("No benchmarks queued. Check model/processor selections."),
+ indicator: "red",
+ });
+ }
+ },
+ });
+}
+
+const PARTY_TYPE_MAP = {
+ "Sales Order": "Customer",
+ Expense: "Supplier",
+};
+
+function set_party_type(frm) {
+ const party_type = PARTY_TYPE_MAP[frm.doc.transaction_type];
+ if (party_type && frm.doc.party_type !== party_type) {
+ frm.set_value("party_type", party_type);
+ }
+}
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.json
new file mode 100644
index 0000000..86fdcfb
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.json
@@ -0,0 +1,320 @@
+{
+ "actions": [],
+ "autoname": "naming_series:",
+ "creation": "2026-03-16 00:00:00",
+ "doctype": "DocType",
+ "engine": "InnoDB",
+ "field_order": [
+ "enabled",
+ "column_break_title",
+ "files_section",
+ "files",
+ "is_multiple_files",
+ "section_break_txfs",
+ "transaction_type",
+ "column_break_xusg",
+ "country",
+ "section_break_sobg",
+ "company",
+ "column_break_qccq",
+ "party_type",
+ "party",
+ "processing_section",
+ "column_break_ai_models",
+ "openai_gpt_4o_mini",
+ "openai_gpt_4o",
+ "openai_gpt_5_mini",
+ "openai_gpt_5",
+ "column_break_leid",
+ "deepseek_chat",
+ "deepseek_reasoner",
+ "column_break_zgpf",
+ "google_gemini_flash_25",
+ "google_gemini_pro_25",
+ "pdf_processor_section",
+ "ocrmypdf",
+ "docling",
+ "column_break_pdf_processor",
+ "other_config_section",
+ "page_limit",
+ "column_break_jjht",
+ "expected_fields_section",
+ "expected_fields",
+ "section_break_bhsg",
+ "naming_series",
+ "column_break_aoce",
+ "amended_from"
+ ],
+ "fields": [
+ {
+ "fieldname": "column_break_title",
+ "fieldtype": "Column Break"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "1",
+ "fieldname": "enabled",
+ "fieldtype": "Check",
+ "in_list_view": 1,
+ "in_standard_filter": 1,
+ "label": "Enabled"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "0",
+ "fieldname": "is_multiple_files",
+ "fieldtype": "Check",
+ "in_standard_filter": 1,
+ "label": "Multiple Files",
+ "read_only": 1
+ },
+ {
+ "fieldname": "transaction_type",
+ "fieldtype": "Select",
+ "in_list_view": 1,
+ "label": "Transaction Type",
+ "options": "Sales Order\nExpense",
+ "reqd": 1
+ },
+ {
+ "default": "Other",
+ "fieldname": "country",
+ "fieldtype": "Select",
+ "label": "Country",
+ "options": "India\nOther",
+ "reqd": 1
+ },
+ {
+ "fieldname": "company",
+ "fieldtype": "Link",
+ "label": "Company",
+ "options": "Company"
+ },
+ {
+ "fieldname": "party_type",
+ "fieldtype": "Link",
+ "hidden": 1,
+ "label": "Party Type",
+ "options": "DocType"
+ },
+ {
+ "depends_on": "eval: doc.party_type",
+ "fieldname": "party",
+ "fieldtype": "Dynamic Link",
+ "label": "Party",
+ "options": "party_type"
+ },
+ {
+ "fieldname": "files_section",
+ "fieldtype": "Section Break",
+ "label": "Files"
+ },
+ {
+ "allow_on_submit": 1,
+ "fieldname": "files",
+ "fieldtype": "Table",
+ "label": "Files",
+ "options": "Parser Benchmark Dataset File",
+ "reqd": 1
+ },
+ {
+ "fieldname": "processing_section",
+ "fieldtype": "Section Break",
+ "label": "AI Models"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "0",
+ "fieldname": "deepseek_chat",
+ "fieldtype": "Check",
+ "label": "DeepSeek Chat"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "0",
+ "fieldname": "deepseek_reasoner",
+ "fieldtype": "Check",
+ "label": "DeepSeek Reasoner"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "0",
+ "fieldname": "openai_gpt_4o",
+ "fieldtype": "Check",
+ "label": "OpenAI gpt-4o"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "0",
+ "fieldname": "openai_gpt_4o_mini",
+ "fieldtype": "Check",
+ "label": "OpenAI gpt-4o-mini"
+ },
+ {
+ "fieldname": "column_break_ai_models",
+ "fieldtype": "Column Break"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "1",
+ "fieldname": "openai_gpt_5",
+ "fieldtype": "Check",
+ "label": "OpenAI gpt-5"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "0",
+ "fieldname": "openai_gpt_5_mini",
+ "fieldtype": "Check",
+ "label": "OpenAI gpt-5-mini"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "0",
+ "fieldname": "google_gemini_pro_25",
+ "fieldtype": "Check",
+ "label": "Google Gemini Pro-2.5"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "0",
+ "fieldname": "google_gemini_flash_25",
+ "fieldtype": "Check",
+ "label": "Google Gemini Flash-2.5"
+ },
+ {
+ "fieldname": "pdf_processor_section",
+ "fieldtype": "Section Break",
+ "label": "PDF Processors"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "1",
+ "fieldname": "ocrmypdf",
+ "fieldtype": "Check",
+ "label": "OCRMyPDF"
+ },
+ {
+ "fieldname": "column_break_pdf_processor",
+ "fieldtype": "Column Break"
+ },
+ {
+ "allow_on_submit": 1,
+ "default": "0",
+ "fieldname": "docling",
+ "fieldtype": "Check",
+ "label": "Docling"
+ },
+ {
+ "fieldname": "other_config_section",
+ "fieldtype": "Section Break",
+ "label": "Other Configuration"
+ },
+ {
+ "allow_on_submit": 1,
+ "fieldname": "page_limit",
+ "fieldtype": "Int",
+ "label": "Page Limit",
+ "non_negative": 1
+ },
+ {
+ "fieldname": "expected_fields_section",
+ "fieldtype": "Section Break",
+ "label": "Expected Fields"
+ },
+ {
+ "allow_on_submit": 1,
+ "description": "Add one row per response key you want to score. Pick the key and paste only its expected JSON value.",
+ "fieldname": "expected_fields",
+ "fieldtype": "Table",
+ "label": "Expected Fields",
+ "options": "Parser Benchmark Expected Field"
+ },
+ {
+ "collapsible": 1,
+ "fieldname": "section_break_bhsg",
+ "fieldtype": "Section Break"
+ },
+ {
+ "default": "PAR-BM-DTS-",
+ "fieldname": "naming_series",
+ "fieldtype": "Select",
+ "label": "Naming Series",
+ "options": "PAR-BM-DTS-"
+ },
+ {
+ "fieldname": "column_break_leid",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "column_break_jjht",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "column_break_zgpf",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "section_break_sobg",
+ "fieldtype": "Section Break"
+ },
+ {
+ "fieldname": "column_break_qccq",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "amended_from",
+ "fieldtype": "Link",
+ "label": "Amended From",
+ "no_copy": 1,
+ "options": "Parser Benchmark Dataset",
+ "print_hide": 1,
+ "read_only": 1
+ },
+ {
+ "fieldname": "column_break_aoce",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "section_break_txfs",
+ "fieldtype": "Section Break",
+ "label": "Transaction Details"
+ },
+ {
+ "fieldname": "column_break_xusg",
+ "fieldtype": "Column Break"
+ }
+ ],
+ "index_web_pages_for_search": 1,
+ "is_submittable": 1,
+ "links": [
+ {
+ "link_doctype": "Parser Benchmark Log",
+ "link_fieldname": "dataset"
+ }
+ ],
+ "modified": "2026-03-30 15:01:03.977940",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Parser Benchmark Dataset",
+ "naming_rule": "By \"Naming Series\" field",
+ "owner": "Administrator",
+ "permissions": [
+ {
+ "create": 1,
+ "delete": 1,
+ "email": 1,
+ "export": 1,
+ "print": 1,
+ "read": 1,
+ "report": 1,
+ "role": "System Manager",
+ "share": 1,
+ "write": 1
+ }
+ ],
+ "row_format": "Dynamic",
+ "sort_field": "modified",
+ "sort_order": "DESC",
+ "states": []
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.py
new file mode 100644
index 0000000..cc7c55c
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.py
@@ -0,0 +1,245 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+import frappe
+from frappe import _
+from frappe.model.document import Document
+
+# Maps dataset checkbox fieldnames → model/processor display names
+AI_MODEL_FIELD_MAP = {
+ "deepseek_chat": "DeepSeek Chat",
+ "deepseek_reasoner": "DeepSeek Reasoner",
+ "openai_gpt_4o": "OpenAI gpt-4o",
+ "openai_gpt_4o_mini": "OpenAI gpt-4o-mini",
+ "openai_gpt_5": "OpenAI gpt-5",
+ "openai_gpt_5_mini": "OpenAI gpt-5-mini",
+ "google_gemini_pro_25": "Google Gemini Pro-2.5",
+ "google_gemini_flash_25": "Google Gemini Flash-2.5",
+}
+
+PDF_PROCESSOR_FIELD_MAP = {
+ "ocrmypdf": "OCRMyPDF",
+ "docling": "Docling",
+}
+
+DOCTYPE = "Parser Benchmark Dataset"
+
+
+class ParserBenchmarkDataset(Document):
+ # begin: auto-generated types
+ # This code is auto-generated. Do not modify anything in this block.
+
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING:
+ from frappe.types import DF
+
+ from transaction_parser.parser_benchmark.doctype.parser_benchmark_dataset_file.parser_benchmark_dataset_file import (
+ ParserBenchmarkDatasetFile,
+ )
+ from transaction_parser.parser_benchmark.doctype.parser_benchmark_expected_field.parser_benchmark_expected_field import (
+ ParserBenchmarkExpectedField,
+ )
+
+ amended_from: DF.Link | None
+ company: DF.Link | None
+ country: DF.Literal["India", "Other"]
+ deepseek_chat: DF.Check
+ deepseek_reasoner: DF.Check
+ docling: DF.Check
+ enabled: DF.Check
+ expected_fields: DF.Table[ParserBenchmarkExpectedField]
+ files: DF.Table[ParserBenchmarkDatasetFile]
+ google_gemini_flash_25: DF.Check
+ google_gemini_pro_25: DF.Check
+ is_multiple_files: DF.Check
+ naming_series: DF.Literal["PAR-BM-DTS-"]
+ ocrmypdf: DF.Check
+ openai_gpt_4o: DF.Check
+ openai_gpt_4o_mini: DF.Check
+ openai_gpt_5: DF.Check
+ openai_gpt_5_mini: DF.Check
+ page_limit: DF.Int
+ party: DF.DynamicLink | None
+ party_type: DF.Link | None
+ transaction_type: DF.Literal["Sales Order", "Expense"]
+ # end: auto-generated types
+
+ SUPPORTED_FILE_TYPES = ("PDF", "CSV", "XLSX", "XLS")
+
+ def validate(self):
+ self.validate_files()
+ self.validate_selected_models()
+ self.validate_expected_fields()
+
+ def before_update_after_submit(self):
+ self.validate_files()
+
+ def validate_files(self):
+ """Set file_type for each row and auto-set is_multiple_files."""
+ for row in self.files:
+ if row.file and (not row.file_type or row.has_value_changed("file")):
+ file_doc = frappe.get_last_doc("File", filters={"file_url": row.file})
+ row.file_type = file_doc.file_type
+
+ if row.file_type not in self.SUPPORTED_FILE_TYPES:
+ frappe.throw(
+ _(
+ "File '{0}' has unsupported type '{1}'. Supported types are:
{2}."
+ ).format(
+ file_doc.file_name,
+ row.file_type,
+ "
".join(self.SUPPORTED_FILE_TYPES),
+ )
+ )
+
+ self.is_multiple_files = len(self.files) > 1
+
+ def validate_selected_models(self):
+ if not self.get_selected_models():
+ frappe.throw(_("Please select at least one AI Model."))
+
+ def validate_expected_fields(self):
+ if not self.expected_fields:
+ return
+
+ seen_keys = set()
+ for row in self.expected_fields:
+ if row.key in seen_keys:
+ frappe.throw(
+ _("Duplicate key '{0}' in Expected Fields row {1}").format(
+ row.key, row.idx
+ )
+ )
+ seen_keys.add(row.key)
+
+ try:
+ frappe.parse_json(row.expected_json)
+ except Exception:
+ frappe.throw(
+ title=_("Invalid JSON"),
+ msg=_(
+ "Expected JSON in row {0} (key: {1}) must be valid JSON."
+ ).format(row.idx, row.key),
+ )
+
+ def get_selected_models(self) -> list[str]:
+ """Return list of selected AI model names."""
+ return [label for field, label in AI_MODEL_FIELD_MAP.items() if self.get(field)]
+
+ def get_selected_processors(self) -> list[str]:
+ """Return list of selected PDF processor names."""
+ return [
+ label for field, label in PDF_PROCESSOR_FIELD_MAP.items() if self.get(field)
+ ]
+
+ def has_pdf_file(self) -> bool:
+ """Check if any file in the child table is a PDF."""
+ return any(row.file_type == "PDF" for row in self.files)
+
+ def get_file_docs(self) -> list:
+ """Return File documents for each row in the files child table."""
+ file_docs = []
+ for row in self.files:
+ file_doc = frappe.get_last_doc("File", filters={"file_url": row.file})
+ file_docs.append(file_doc)
+ return file_docs
+
+
+@frappe.whitelist()
+def run_benchmark(dataset_name: str):
+ """Create Benchmark Logs for each model x processor combo and enqueue runs."""
+ frappe.has_permission(DOCTYPE, "write", throw=True)
+
+ if frappe.db.get_value(DOCTYPE, dataset_name, "docstatus") != 1:
+ frappe.throw(_("Dataset must be submitted before running benchmarks."))
+
+ log_names = create_and_enqueue_benchmark_logs(dataset_name)
+
+ if not log_names:
+ frappe.throw(
+ _(
+ "No new benchmarks to queue. Please check if the dataset is properly configured"
+ )
+ )
+
+ return log_names
+
+
+def create_and_enqueue_benchmark_logs(dataset_name: str) -> list[str]:
+ """Create one log per model x processor combo and enqueue each for background execution."""
+ dataset: ParserBenchmarkDataset = frappe.get_cached_doc(DOCTYPE, dataset_name)
+ models = dataset.get_selected_models()
+ processors = (
+ (dataset.get_selected_processors() or [None])
+ if dataset.has_pdf_file()
+ else [None]
+ )
+
+ commit_info = get_commit_info()
+ log_names = []
+
+ for ai_model in models:
+ for pdf_processor in processors:
+ log = frappe.new_doc("Parser Benchmark Log")
+ log.update(
+ {
+ "status": "Queued",
+ "dataset": dataset.name,
+ "ai_model": ai_model,
+ "pdf_processor": pdf_processor,
+ "currency": "USD",
+ **commit_info,
+ }
+ )
+ log.insert(ignore_permissions=True)
+ log_names.append(log.name)
+
+ # commit before enqueuing so background jobs can read the inserted logs
+ frappe.db.commit() # nosemgrep
+
+ for log_name in log_names:
+ try:
+ frappe.enqueue(
+ _run_benchmark,
+ log_name=log_name,
+ queue="long",
+ )
+ except Exception:
+ frappe.db.set_value("Parser Benchmark Log", log_name, "status", "Failed")
+ frappe.db.commit() # nosemgrep -- persist Failed status when enqueue fails
+
+ return log_names
+
+
+def _run_benchmark(log_name: str):
+ from transaction_parser.parser_benchmark.runner import BenchmarkRunner
+
+ BenchmarkRunner(log_name).run()
+
+
+def get_commit_info() -> dict:
+ """Return the current git commit hash and message for the transaction_parser app."""
+ import subprocess
+
+ app_path = frappe.get_app_path("transaction_parser")
+
+ try:
+ result = subprocess.run(
+ ["git", "log", "-1", "--format=%H%n%s"],
+ cwd=app_path,
+ capture_output=True,
+ text=True,
+ timeout=5,
+ )
+
+ if result.returncode == 0:
+ lines = result.stdout.strip().split("\n", 1)
+ return {
+ "commit_hash": lines[0] if lines else "",
+ "commit_message": lines[1] if len(lines) > 1 else "",
+ }
+ except Exception:
+ pass
+
+ return {}
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/test_parser_benchmark_dataset.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/test_parser_benchmark_dataset.py
new file mode 100644
index 0000000..1ae2c88
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/test_parser_benchmark_dataset.py
@@ -0,0 +1,9 @@
+# Copyright (c) 2026, Resilient Tech and Contributors
+# See license.txt
+
+# import frappe
+from frappe.tests.utils import FrappeTestCase
+
+
+class TestParserBenchmarkDataset(FrappeTestCase):
+ pass
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/__init__.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/__init__.py
new file mode 100644
index 0000000..c4fea77
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.json
new file mode 100644
index 0000000..5f54a45
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.json
@@ -0,0 +1,43 @@
+{
+ "actions": [],
+ "creation": "2026-03-30 00:00:00",
+ "doctype": "DocType",
+ "engine": "InnoDB",
+ "field_order": [
+ "file",
+ "column_break_yahx",
+ "file_type"
+ ],
+ "fields": [
+ {
+ "fieldname": "file",
+ "fieldtype": "Attach",
+ "in_list_view": 1,
+ "label": "File",
+ "reqd": 1
+ },
+ {
+ "fieldname": "file_type",
+ "fieldtype": "Data",
+ "in_list_view": 1,
+ "label": "File Type",
+ "read_only": 1
+ },
+ {
+ "fieldname": "column_break_yahx",
+ "fieldtype": "Column Break"
+ }
+ ],
+ "istable": 1,
+ "links": [],
+ "modified": "2026-03-30 15:01:27.752102",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Parser Benchmark Dataset File",
+ "owner": "Administrator",
+ "permissions": [],
+ "row_format": "Dynamic",
+ "sort_field": "modified",
+ "sort_order": "DESC",
+ "states": []
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.py
new file mode 100644
index 0000000..78b5e42
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+from frappe.model.document import Document
+
+
+class ParserBenchmarkDatasetFile(Document):
+ # begin: auto-generated types
+ # This code is auto-generated. Do not modify anything in this block.
+
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING:
+ from frappe.types import DF
+
+ file: DF.Attach
+ file_type: DF.Data | None
+ parent: DF.Data
+ parentfield: DF.Data
+ parenttype: DF.Data
+ # end: auto-generated types
+
+ pass
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_expected_field/__init__.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_expected_field/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_expected_field/parser_benchmark_expected_field.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_expected_field/parser_benchmark_expected_field.json
new file mode 100644
index 0000000..1d69fdf
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_expected_field/parser_benchmark_expected_field.json
@@ -0,0 +1,41 @@
+{
+ "actions": [],
+ "creation": "2026-03-27 00:00:00",
+ "doctype": "DocType",
+ "engine": "InnoDB",
+ "field_order": [
+ "key",
+ "expected_json"
+ ],
+ "fields": [
+ {
+ "fieldname": "key",
+ "fieldtype": "Select",
+ "in_list_view": 1,
+ "label": "Key",
+ "options": "document_number\ndocument_date\ncurrency\nitem_list\ntotals\npayment_terms\nlocal_terms\ndelivery_date\nbuyer\nvendor\ncompany\nsupplier",
+ "reqd": 1
+ },
+ {
+ "allow_on_submit": 1,
+ "fieldname": "expected_json",
+ "fieldtype": "Code",
+ "in_list_view": 1,
+ "label": "Expected JSON",
+ "options": "JSON",
+ "reqd": 1
+ }
+ ],
+ "istable": 1,
+ "links": [],
+ "modified": "2026-03-27 10:56:30.834125",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Parser Benchmark Expected Field",
+ "owner": "Administrator",
+ "permissions": [],
+ "row_format": "Dynamic",
+ "sort_field": "modified",
+ "sort_order": "DESC",
+ "states": []
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_expected_field/parser_benchmark_expected_field.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_expected_field/parser_benchmark_expected_field.py
new file mode 100644
index 0000000..670f9da
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_expected_field/parser_benchmark_expected_field.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+from frappe.model.document import Document
+
+
+class ParserBenchmarkExpectedField(Document):
+ # begin: auto-generated types
+ # This code is auto-generated. Do not modify anything in this block.
+
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING:
+ from frappe.types import DF
+
+ expected_json: DF.Code
+ key: DF.Literal[
+ "document_number",
+ "document_date",
+ "currency",
+ "item_list",
+ "totals",
+ "payment_terms",
+ "local_terms",
+ "delivery_date",
+ "buyer",
+ "vendor",
+ "company",
+ "supplier",
+ ]
+ parent: DF.Data
+ parentfield: DF.Data
+ parenttype: DF.Data
+ # end: auto-generated types
+
+ pass
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_key_weight/__init__.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_key_weight/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_key_weight/parser_benchmark_key_weight.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_key_weight/parser_benchmark_key_weight.json
new file mode 100644
index 0000000..4c0c322
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_key_weight/parser_benchmark_key_weight.json
@@ -0,0 +1,46 @@
+{
+ "actions": [],
+ "creation": "2026-03-27 00:00:00",
+ "doctype": "DocType",
+ "engine": "InnoDB",
+ "field_order": [
+ "key",
+ "column_break_qaca",
+ "weight"
+ ],
+ "fields": [
+ {
+ "fieldname": "key",
+ "fieldtype": "Select",
+ "in_list_view": 1,
+ "label": "Key",
+ "options": "document_number\ndocument_date\ncurrency\nitem_list\ntotals\npayment_terms\nlocal_terms\ndelivery_date\nbuyer\nvendor\ncompany\nsupplier",
+ "reqd": 1
+ },
+ {
+ "default": "1",
+ "fieldname": "weight",
+ "fieldtype": "Float",
+ "in_list_view": 1,
+ "label": "Weight",
+ "non_negative": 1,
+ "reqd": 1
+ },
+ {
+ "fieldname": "column_break_qaca",
+ "fieldtype": "Column Break"
+ }
+ ],
+ "istable": 1,
+ "links": [],
+ "modified": "2026-03-27 11:00:43.378124",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Parser Benchmark Key Weight",
+ "owner": "Administrator",
+ "permissions": [],
+ "row_format": "Dynamic",
+ "sort_field": "modified",
+ "sort_order": "DESC",
+ "states": []
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_key_weight/parser_benchmark_key_weight.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_key_weight/parser_benchmark_key_weight.py
new file mode 100644
index 0000000..117ba33
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_key_weight/parser_benchmark_key_weight.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+from frappe.model.document import Document
+
+
+class ParserBenchmarkKeyWeight(Document):
+ # begin: auto-generated types
+ # This code is auto-generated. Do not modify anything in this block.
+
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING:
+ from frappe.types import DF
+
+ key: DF.Literal[
+ "document_number",
+ "document_date",
+ "currency",
+ "item_list",
+ "totals",
+ "payment_terms",
+ "local_terms",
+ "delivery_date",
+ "buyer",
+ "vendor",
+ "company",
+ "supplier",
+ ]
+ parent: DF.Data
+ parentfield: DF.Data
+ parenttype: DF.Data
+ weight: DF.Float
+ # end: auto-generated types
+
+ pass
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/__init__.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.js b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.js
new file mode 100644
index 0000000..e21eaee
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.js
@@ -0,0 +1,8 @@
+// Copyright (c) 2026, Resilient Tech and contributors
+// For license information, please see license.txt
+
+// frappe.ui.form.on("Parser Benchmark Log", {
+// refresh(frm) {
+
+// },
+// });
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.json
new file mode 100644
index 0000000..0bbe3c6
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.json
@@ -0,0 +1,451 @@
+{
+ "actions": [],
+ "autoname": "naming_series:",
+ "creation": "2026-03-16 00:00:00",
+ "doctype": "DocType",
+ "engine": "InnoDB",
+ "field_order": [
+ "config_section",
+ "dataset",
+ "transaction_type",
+ "country",
+ "column_break_summary",
+ "company",
+ "party_type",
+ "party",
+ "section_break_peyo",
+ "total_time",
+ "column_break_fmal",
+ "status",
+ "version_section",
+ "commit_hash",
+ "column_break_version",
+ "commit_message",
+ "section_break_nmcv",
+ "naming_series",
+ "column_break_ubhs",
+ "file_parsing_tab",
+ "pdf_processor",
+ "column_break_file_metrics",
+ "page_limit",
+ "section_break_umzr",
+ "file_parse_time",
+ "column_break_kgph",
+ "file_parse_memory",
+ "section_break_file_content",
+ "file_content",
+ "ai_response_tab",
+ "other_details_section",
+ "ai_model",
+ "column_break_fsxb",
+ "ai_parse_time",
+ "api_cost_section",
+ "input_token_cost",
+ "currency",
+ "column_break_ncdl",
+ "output_token_cost",
+ "section_break_yqhs",
+ "prompt_tokens",
+ "completion_tokens",
+ "total_tokens",
+ "column_break_ai",
+ "input_cost",
+ "output_cost",
+ "total_cost",
+ "section_break_ai_content",
+ "ai_response",
+ "accuracy_tab",
+ "accuracy_section",
+ "accuracy_score",
+ "column_break_accuracy",
+ "score_details_section",
+ "score_details",
+ "error_tab",
+ "section_break_error",
+ "error"
+ ],
+ "fields": [
+ {
+ "default": "PAR-BM-LOG-",
+ "fieldname": "naming_series",
+ "fieldtype": "Select",
+ "hidden": 1,
+ "label": "Naming Series",
+ "options": "PAR-BM-LOG-"
+ },
+ {
+ "fieldname": "dataset",
+ "fieldtype": "Link",
+ "in_list_view": 1,
+ "label": "Dataset",
+ "options": "Parser Benchmark Dataset",
+ "read_only": 1,
+ "reqd": 1,
+ "search_index": 1
+ },
+ {
+ "default": "Queued",
+ "fieldname": "status",
+ "fieldtype": "Select",
+ "in_list_view": 1,
+ "in_standard_filter": 1,
+ "label": "Status",
+ "options": "Queued\nRunning\nCompleted\nFailed",
+ "read_only": 1
+ },
+ {
+ "fieldname": "column_break_summary",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "ai_model",
+ "fieldtype": "Select",
+ "in_list_view": 1,
+ "label": "AI Model",
+ "options": "DeepSeek Chat\nDeepSeek Reasoner\nOpenAI gpt-4o\nOpenAI gpt-4o-mini\nOpenAI gpt-5\nOpenAI gpt-5-mini\nGoogle Gemini Pro-2.5\nGoogle Gemini Flash-2.5",
+ "read_only": 1
+ },
+ {
+ "fieldname": "pdf_processor",
+ "fieldtype": "Select",
+ "label": "PDF Processor",
+ "options": "\nOCRMyPDF\nDocling",
+ "read_only": 1
+ },
+ {
+ "fieldname": "total_time",
+ "fieldtype": "Float",
+ "in_list_view": 1,
+ "label": "Total Time (s)",
+ "read_only": 1
+ },
+
+ {
+ "fieldname": "file_parsing_tab",
+ "fieldtype": "Tab Break",
+ "label": "File Parsing"
+ },
+ {
+ "fieldname": "file_parse_time",
+ "fieldtype": "Float",
+ "label": "Parse Time (s)",
+ "read_only": 1
+ },
+ {
+ "fieldname": "column_break_file_metrics",
+ "fieldtype": "Column Break"
+ },
+ {
+ "description": "Peak memory allocated during file parsing, measured using tracemalloc",
+ "fieldname": "file_parse_memory",
+ "fieldtype": "Float",
+ "label": "Peak Memory (MB)",
+ "read_only": 1
+ },
+ {
+ "fieldname": "section_break_file_content",
+ "fieldtype": "Section Break",
+ "label": "Extracted Content"
+ },
+ {
+ "fieldname": "file_content",
+ "fieldtype": "Code",
+ "label": "File Content",
+ "options": "Markdown",
+ "read_only": 1
+ },
+ {
+ "fieldname": "ai_response_tab",
+ "fieldtype": "Tab Break",
+ "label": "AI Response"
+ },
+ {
+ "fieldname": "prompt_tokens",
+ "fieldtype": "Int",
+ "in_list_view": 1,
+ "label": "Prompt Tokens",
+ "read_only": 1
+ },
+ {
+ "fieldname": "completion_tokens",
+ "fieldtype": "Int",
+ "label": "Completion Tokens",
+ "read_only": 1
+ },
+ {
+ "fieldname": "total_tokens",
+ "fieldtype": "Int",
+ "label": "Total Tokens",
+ "read_only": 1
+ },
+ {
+ "fieldname": "column_break_ai",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "ai_parse_time",
+ "fieldtype": "Float",
+ "label": "AI Parse Time (s)",
+ "read_only": 1
+ },
+ {
+ "fieldname": "input_cost",
+ "fieldtype": "Currency",
+ "label": "Input Cost",
+ "options": "currency",
+ "read_only": 1
+ },
+ {
+ "fieldname": "output_cost",
+ "fieldtype": "Currency",
+ "label": "Output Cost",
+ "options": "currency",
+ "read_only": 1
+ },
+ {
+ "fieldname": "total_cost",
+ "fieldtype": "Currency",
+ "in_list_view": 1,
+ "label": "Total Cost",
+ "options": "currency",
+ "read_only": 1
+ },
+ {
+ "fieldname": "currency",
+ "fieldtype": "Link",
+ "hidden": 1,
+ "label": "Currency",
+ "options": "Currency",
+ "read_only": 1
+ },
+ {
+ "fieldname": "section_break_ai_content",
+ "fieldtype": "Section Break"
+ },
+ {
+ "description": "Only document content",
+ "fieldname": "ai_response",
+ "fieldtype": "Code",
+ "label": "AI Response",
+ "read_only": 1
+ },
+ {
+ "depends_on": "eval: doc.status === 'Failed'",
+ "fieldname": "section_break_error",
+ "fieldtype": "Section Break"
+ },
+ {
+ "fieldname": "error",
+ "fieldtype": "Code",
+ "label": "Error Traceback",
+ "read_only": 1
+ },
+ {
+ "collapsible": 1,
+ "fieldname": "version_section",
+ "fieldtype": "Section Break",
+ "label": "Version"
+ },
+ {
+ "fieldname": "commit_hash",
+ "fieldtype": "Data",
+ "label": "Commit Hash",
+ "length": 40,
+ "read_only": 1
+ },
+ {
+ "fieldname": "column_break_version",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "commit_message",
+ "fieldtype": "Small Text",
+ "label": "Commit Message",
+ "read_only": 1
+ },
+ {
+ "collapsible": 1,
+ "fieldname": "section_break_nmcv",
+ "fieldtype": "Section Break"
+ },
+ {
+ "fieldname": "column_break_ubhs",
+ "fieldtype": "Column Break"
+ },
+ {
+ "description": "Cost per 1M input tokens at the time of this run",
+ "fieldname": "input_token_cost",
+ "fieldtype": "Currency",
+ "label": "Input Token Cost",
+ "non_negative": 1,
+ "options": "currency",
+ "read_only": 1
+ },
+ {
+ "description": "Cost per 1M output tokens at the time of this run",
+ "fieldname": "output_token_cost",
+ "fieldtype": "Currency",
+ "label": "Output Token Cost",
+ "non_negative": 1,
+ "options": "currency",
+ "read_only": 1
+ },
+ {
+ "fieldname": "column_break_ncdl",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "section_break_yqhs",
+ "fieldtype": "Section Break",
+ "label": "Token Details"
+ },
+ {
+ "fieldname": "accuracy_tab",
+ "fieldtype": "Tab Break",
+ "label": "Accuracy"
+ },
+ {
+ "fieldname": "accuracy_section",
+ "fieldtype": "Section Break"
+ },
+ {
+ "description": "Percentage of fields matching the expected result (0-100)",
+ "fieldname": "accuracy_score",
+ "fieldtype": "Percent",
+ "in_list_view": 1,
+ "label": "Accuracy Score (%)",
+ "read_only": 1
+ },
+ {
+ "fieldname": "column_break_accuracy",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "score_details_section",
+ "fieldtype": "Section Break",
+ "label": "Score Details"
+ },
+ {
+ "fieldname": "score_details",
+ "fieldtype": "Table",
+ "label": "Score Details",
+ "options": "Parser Benchmark Score Detail",
+ "read_only": 1
+ },
+ {
+ "fieldname": "error_tab",
+ "fieldtype": "Tab Break",
+ "label": "Error"
+ },
+ {
+ "fieldname": "api_cost_section",
+ "fieldtype": "Section Break",
+ "label": "API Cost"
+ },
+ {
+ "fieldname": "other_details_section",
+ "fieldtype": "Section Break",
+ "label": "Details"
+ },
+ {
+ "fieldname": "column_break_fsxb",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "transaction_type",
+ "fieldtype": "Select",
+ "is_virtual": 1,
+ "label": "Transaction Type",
+ "options": "Sales Order\nExpense",
+ "read_only": 1
+ },
+ {
+ "fieldname": "country",
+ "fieldtype": "Select",
+ "is_virtual": 1,
+ "label": "Country",
+ "options": "India\nOther",
+ "read_only": 1
+ },
+ {
+ "fieldname": "company",
+ "fieldtype": "Link",
+ "is_virtual": 1,
+ "label": "Company",
+ "options": "Company",
+ "read_only": 1
+ },
+ {
+ "fieldname": "party_type",
+ "fieldtype": "Link",
+ "hidden": 1,
+ "is_virtual": 1,
+ "label": "Party Type",
+ "options": "DocType",
+ "read_only": 1
+ },
+ {
+ "fieldname": "party",
+ "fieldtype": "Dynamic Link",
+ "is_virtual": 1,
+ "label": "Party",
+ "options": "party_type",
+ "read_only": 1
+ },
+ {
+ "fieldname": "section_break_peyo",
+ "fieldtype": "Section Break"
+ },
+ {
+ "fieldname": "column_break_fmal",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "config_section",
+ "fieldtype": "Section Break",
+ "label": "Config"
+ },
+ {
+ "fieldname": "page_limit",
+ "fieldtype": "Int",
+ "is_virtual": 1,
+ "label": "Page Limit",
+ "read_only": 1
+ },
+ {
+ "fieldname": "section_break_umzr",
+ "fieldtype": "Section Break"
+ },
+ {
+ "fieldname": "column_break_kgph",
+ "fieldtype": "Column Break"
+ }
+ ],
+ "in_create": 1,
+ "index_web_pages_for_search": 1,
+ "links": [],
+ "modified": "2026-03-27 10:32:31.037034",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Parser Benchmark Log",
+ "naming_rule": "By \"Naming Series\" field",
+ "owner": "Administrator",
+ "permissions": [
+ {
+ "create": 1,
+ "delete": 1,
+ "email": 1,
+ "export": 1,
+ "print": 1,
+ "read": 1,
+ "report": 1,
+ "role": "System Manager",
+ "share": 1,
+ "write": 1
+ }
+ ],
+ "row_format": "Dynamic",
+ "sort_field": "modified",
+ "sort_order": "DESC",
+ "states": []
+}
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.py
new file mode 100644
index 0000000..5cd5b7f
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+import frappe
+from frappe.model.document import Document
+
+
+class ParserBenchmarkLog(Document):
+ # begin: auto-generated types
+ # This code is auto-generated. Do not modify anything in this block.
+
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING:
+ from frappe.types import DF
+
+ from transaction_parser.parser_benchmark.doctype.parser_benchmark_score_detail.parser_benchmark_score_detail import (
+ ParserBenchmarkScoreDetail,
+ )
+
+ accuracy_score: DF.Percent
+ ai_model: DF.Literal[
+ "DeepSeek Chat",
+ "DeepSeek Reasoner",
+ "OpenAI gpt-4o",
+ "OpenAI gpt-4o-mini",
+ "OpenAI gpt-5",
+ "OpenAI gpt-5-mini",
+ "Google Gemini Pro-2.5",
+ "Google Gemini Flash-2.5",
+ ]
+ ai_parse_time: DF.Float
+ ai_response: DF.Code | None
+ commit_hash: DF.Data | None
+ commit_message: DF.SmallText | None
+ company: DF.Link | None
+ completion_tokens: DF.Int
+ country: DF.Literal["India", "Other"]
+ currency: DF.Link | None
+ dataset: DF.Link
+ error: DF.Code | None
+ file_content: DF.Code | None
+ file_parse_memory: DF.Float
+ file_parse_time: DF.Float
+ input_cost: DF.Currency
+ input_token_cost: DF.Currency
+ naming_series: DF.Literal["PAR-BM-LOG-"]
+ output_cost: DF.Currency
+ output_token_cost: DF.Currency
+ party: DF.DynamicLink | None
+ party_type: DF.Link | None
+ pdf_processor: DF.Literal["", "OCRMyPDF", "Docling"]
+ prompt_tokens: DF.Int
+ score_details: DF.Table[ParserBenchmarkScoreDetail]
+ status: DF.Literal["Queued", "Running", "Completed", "Failed"]
+ total_cost: DF.Currency
+ total_time: DF.Float
+ total_tokens: DF.Int
+ transaction_type: DF.Literal["Sales Order", "Expense"]
+ # end: auto-generated types
+
+ def _get_dataset(self):
+ if not self.dataset:
+ return None
+
+ if not hasattr(self, "_dataset_doc"):
+ self._dataset_doc = frappe.get_cached_doc(
+ "Parser Benchmark Dataset", self.dataset
+ )
+
+ return self._dataset_doc
+
+ def get_from_dataset(self, fieldname: str):
+ dataset = self._get_dataset()
+ return dataset.get(fieldname) if dataset else None
+
+ @property
+ def transaction_type(self):
+ return self.get_from_dataset("transaction_type")
+
+ @property
+ def country(self):
+ return self.get_from_dataset("country")
+
+ @property
+ def company(self):
+ return self.get_from_dataset("company")
+
+ @property
+ def party_type(self):
+ return self.get_from_dataset("party_type")
+
+ @property
+ def party(self):
+ return self.get_from_dataset("party")
+
+ @property
+ def page_limit(self):
+ return self.get_from_dataset("page_limit") or 0
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/test_parser_benchmark_log.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/test_parser_benchmark_log.py
new file mode 100644
index 0000000..a04dc86
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/test_parser_benchmark_log.py
@@ -0,0 +1,9 @@
+# Copyright (c) 2026, Resilient Tech and Contributors
+# See license.txt
+
+# import frappe
+from frappe.tests.utils import FrappeTestCase
+
+
+class TestParserBenchmarkLog(FrappeTestCase):
+ pass
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_score_detail/__init__.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_score_detail/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_score_detail/parser_benchmark_score_detail.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_score_detail/parser_benchmark_score_detail.json
new file mode 100644
index 0000000..b8fd5d6
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_score_detail/parser_benchmark_score_detail.json
@@ -0,0 +1,76 @@
+{
+ "actions": [],
+ "creation": "2026-03-27 00:00:00",
+ "doctype": "DocType",
+ "engine": "InnoDB",
+ "field_order": [
+ "key",
+ "total",
+ "column_break_jhbx",
+ "matched",
+ "accuracy",
+ "section_break_zimm",
+ "mismatches"
+ ],
+ "fields": [
+ {
+ "fieldname": "key",
+ "fieldtype": "Data",
+ "in_list_view": 1,
+ "label": "Key",
+ "read_only": 1,
+ "reqd": 1
+ },
+ {
+ "description": "Number of leaf fields whose value matched the expected result",
+ "fieldname": "matched",
+ "fieldtype": "Int",
+ "in_list_view": 1,
+ "label": "Matched",
+ "read_only": 1
+ },
+ {
+ "description": "Total number of leaf fields compared for this key",
+ "fieldname": "total",
+ "fieldtype": "Int",
+ "in_list_view": 1,
+ "label": "Total",
+ "read_only": 1
+ },
+ {
+ "fieldname": "accuracy",
+ "fieldtype": "Percent",
+ "in_list_view": 1,
+ "label": "Accuracy (%)",
+ "read_only": 1
+ },
+ {
+ "description": "List of fields that did not match, with expected and actual values",
+ "fieldname": "mismatches",
+ "fieldtype": "Code",
+ "label": "Mismatches",
+ "options": "JSON",
+ "read_only": 1
+ },
+ {
+ "fieldname": "column_break_jhbx",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "section_break_zimm",
+ "fieldtype": "Section Break"
+ }
+ ],
+ "istable": 1,
+ "links": [],
+ "modified": "2026-03-27 12:30:17.720466",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Parser Benchmark Score Detail",
+ "owner": "Administrator",
+ "permissions": [],
+ "row_format": "Dynamic",
+ "sort_field": "modified",
+ "sort_order": "DESC",
+ "states": []
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_score_detail/parser_benchmark_score_detail.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_score_detail/parser_benchmark_score_detail.py
new file mode 100644
index 0000000..d70cb77
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_score_detail/parser_benchmark_score_detail.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+from frappe.model.document import Document
+
+
+class ParserBenchmarkScoreDetail(Document):
+ # begin: auto-generated types
+ # This code is auto-generated. Do not modify anything in this block.
+
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING:
+ from frappe.types import DF
+
+ accuracy: DF.Percent
+ key: DF.Data
+ matched: DF.Int
+ mismatches: DF.Code | None
+ parent: DF.Data
+ parentfield: DF.Data
+ parenttype: DF.Data
+ total: DF.Int
+ # end: auto-generated types
+
+ pass
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/__init__.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/parser_benchmark_settings.js b/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/parser_benchmark_settings.js
new file mode 100644
index 0000000..bb53144
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/parser_benchmark_settings.js
@@ -0,0 +1,8 @@
+// Copyright (c) 2026, Resilient Tech and contributors
+// For license information, please see license.txt
+
+// frappe.ui.form.on("Parser Benchmark Settings", {
+// refresh(frm) {
+
+// },
+// });
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/parser_benchmark_settings.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/parser_benchmark_settings.json
new file mode 100644
index 0000000..6eda028
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/parser_benchmark_settings.json
@@ -0,0 +1,135 @@
+{
+ "actions": [],
+ "creation": "2026-03-16 00:00:00",
+ "doctype": "DocType",
+ "engine": "InnoDB",
+ "field_order": [
+ "enabled",
+ "column_break_gpyw",
+ "schedule_section",
+ "monday",
+ "thursday",
+ "sunday",
+ "column_break_schedule",
+ "tuesday",
+ "friday",
+ "column_break_kaxp",
+ "wednesday",
+ "saturday",
+ "section_break_kvgf",
+ "token_costs",
+ "key_weights_section",
+ "key_weights"
+ ],
+ "fields": [
+ {
+ "fieldname": "token_costs",
+ "fieldtype": "Table",
+ "label": "Token Costs",
+ "options": "Parser Benchmark Token Cost"
+ },
+ {
+ "depends_on": "eval: doc.enabled",
+ "fieldname": "schedule_section",
+ "fieldtype": "Section Break",
+ "label": "Schedule"
+ },
+ {
+ "default": "0",
+ "fieldname": "monday",
+ "fieldtype": "Check",
+ "label": "Monday"
+ },
+ {
+ "default": "0",
+ "fieldname": "tuesday",
+ "fieldtype": "Check",
+ "label": "Tuesday"
+ },
+ {
+ "default": "0",
+ "fieldname": "wednesday",
+ "fieldtype": "Check",
+ "label": "Wednesday"
+ },
+ {
+ "default": "0",
+ "fieldname": "thursday",
+ "fieldtype": "Check",
+ "label": "Thursday"
+ },
+ {
+ "fieldname": "column_break_schedule",
+ "fieldtype": "Column Break"
+ },
+ {
+ "default": "0",
+ "fieldname": "friday",
+ "fieldtype": "Check",
+ "label": "Friday"
+ },
+ {
+ "default": "0",
+ "fieldname": "saturday",
+ "fieldtype": "Check",
+ "label": "Saturday"
+ },
+ {
+ "default": "0",
+ "fieldname": "sunday",
+ "fieldtype": "Check",
+ "label": "Sunday"
+ },
+ {
+ "fieldname": "column_break_kaxp",
+ "fieldtype": "Column Break"
+ },
+ {
+ "fieldname": "section_break_kvgf",
+ "fieldtype": "Section Break"
+ },
+ {
+ "fieldname": "column_break_gpyw",
+ "fieldtype": "Column Break"
+ },
+ {
+ "default": "0",
+ "fieldname": "enabled",
+ "fieldtype": "Check",
+ "label": "Scheduling Enabled"
+ },
+ {
+ "fieldname": "key_weights_section",
+ "fieldtype": "Section Break"
+ },
+ {
+ "fieldname": "key_weights",
+ "fieldtype": "Table",
+ "label": "Key Weights",
+ "options": "Parser Benchmark Key Weight"
+ }
+ ],
+ "issingle": 1,
+ "links": [],
+ "modified": "2026-03-27 12:19:19.982234",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Parser Benchmark Settings",
+ "owner": "Administrator",
+ "permissions": [
+ {
+ "create": 1,
+ "delete": 1,
+ "email": 1,
+ "print": 1,
+ "read": 1,
+ "role": "System Manager",
+ "share": 1,
+ "write": 1
+ }
+ ],
+ "row_format": "Dynamic",
+ "sort_field": "modified",
+ "sort_order": "DESC",
+ "states": []
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/parser_benchmark_settings.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/parser_benchmark_settings.py
new file mode 100644
index 0000000..103e2e8
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/parser_benchmark_settings.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+import frappe
+from frappe import _
+from frappe.model.document import Document
+from frappe.utils import getdate
+
+WEEKDAY_FIELDS = [
+ "monday",
+ "tuesday",
+ "wednesday",
+ "thursday",
+ "friday",
+ "saturday",
+ "sunday",
+]
+
+
+class ParserBenchmarkSettings(Document):
+ # begin: auto-generated types
+ # This code is auto-generated. Do not modify anything in this block.
+
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING:
+ from frappe.types import DF
+
+ from transaction_parser.parser_benchmark.doctype.parser_benchmark_key_weight.parser_benchmark_key_weight import (
+ ParserBenchmarkKeyWeight,
+ )
+ from transaction_parser.parser_benchmark.doctype.parser_benchmark_token_cost.parser_benchmark_token_cost import (
+ ParserBenchmarkTokenCost,
+ )
+
+ enabled: DF.Check
+ friday: DF.Check
+ key_weights: DF.Table[ParserBenchmarkKeyWeight]
+ monday: DF.Check
+ saturday: DF.Check
+ sunday: DF.Check
+ thursday: DF.Check
+ token_costs: DF.Table[ParserBenchmarkTokenCost]
+ tuesday: DF.Check
+ wednesday: DF.Check
+ # end: auto-generated types
+
+ def validate(self):
+ self.validate_wights()
+
+ def validate_wights(self):
+ if not self.key_weights:
+ return
+
+ seen_keys = set()
+ for row in self.key_weights:
+ if row.key in seen_keys:
+ frappe.throw(
+ _("Duplicate key '{0}' in Key Weights row {1}").format(
+ row.key, row.idx
+ )
+ )
+ seen_keys.add(row.key)
+
+ def is_scheduled_today(self) -> bool:
+ """Check if today's weekday is enabled in the schedule."""
+ today_index = getdate().weekday() # 0 = Monday
+ return bool(self.get(WEEKDAY_FIELDS[today_index]))
+
+
+def run_scheduled_benchmarks():
+ """Scheduled job: runs all enabled datasets if today is a scheduled day."""
+ from transaction_parser.parser_benchmark.doctype.parser_benchmark_dataset.parser_benchmark_dataset import (
+ create_and_enqueue_benchmark_logs,
+ )
+
+ settings: ParserBenchmarkSettings = frappe.get_cached_doc(
+ "Parser Benchmark Settings"
+ )
+
+ if not settings.enabled or not settings.is_scheduled_today():
+ return
+
+ datasets = frappe.get_all(
+ "Parser Benchmark Dataset",
+ filters={"enabled": 1, "docstatus": 1},
+ pluck="name",
+ )
+
+ for dataset_name in datasets:
+ try:
+ create_and_enqueue_benchmark_logs(dataset_name)
+ except Exception:
+ frappe.log_error(
+ title=f"Failed to enqueue benchmark for dataset {dataset_name}",
+ message=frappe.get_traceback(),
+ )
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/test_parser_benchmark_settings.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/test_parser_benchmark_settings.py
new file mode 100644
index 0000000..d9bd1ae
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_settings/test_parser_benchmark_settings.py
@@ -0,0 +1,9 @@
+# Copyright (c) 2026, Resilient Tech and Contributors
+# See license.txt
+
+# import frappe
+from frappe.tests.utils import FrappeTestCase
+
+
+class TestParserBenchmarkSettings(FrappeTestCase):
+ pass
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_token_cost/__init__.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_token_cost/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_token_cost/parser_benchmark_token_cost.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_token_cost/parser_benchmark_token_cost.json
new file mode 100644
index 0000000..ce0da61
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_token_cost/parser_benchmark_token_cost.json
@@ -0,0 +1,69 @@
+{
+ "actions": [],
+ "creation": "2026-03-16 00:00:00",
+ "doctype": "DocType",
+ "engine": "InnoDB",
+ "field_order": [
+ "ai_model",
+ "input_cost_per_million",
+ "column_break_jvcd",
+ "currency",
+ "output_cost_per_million"
+ ],
+ "fields": [
+ {
+ "fieldname": "ai_model",
+ "fieldtype": "Select",
+ "in_list_view": 1,
+ "label": "AI Model",
+ "options": "DeepSeek Chat\nDeepSeek Reasoner\nOpenAI gpt-4o\nOpenAI gpt-4o-mini\nOpenAI gpt-5\nOpenAI gpt-5-mini\nGoogle Gemini Pro-2.5\nGoogle Gemini Flash-2.5",
+ "reqd": 1
+ },
+ {
+ "default": "USD",
+ "description": "Recommend using USD",
+ "fieldname": "currency",
+ "fieldtype": "Link",
+ "in_list_view": 1,
+ "label": "Currency",
+ "options": "Currency",
+ "reqd": 1
+ },
+ {
+ "description": "Per 1M Tokens",
+ "fieldname": "input_cost_per_million",
+ "fieldtype": "Currency",
+ "in_list_view": 1,
+ "label": "Input Cost",
+ "non_negative": 1,
+ "options": "currency",
+ "reqd": 1
+ },
+ {
+ "description": "Per 1M Tokens",
+ "fieldname": "output_cost_per_million",
+ "fieldtype": "Currency",
+ "in_list_view": 1,
+ "label": "Output Cost",
+ "non_negative": 1,
+ "options": "currency",
+ "reqd": 1
+ },
+ {
+ "fieldname": "column_break_jvcd",
+ "fieldtype": "Column Break"
+ }
+ ],
+ "istable": 1,
+ "links": [],
+ "modified": "2026-03-27 10:36:00.699499",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Parser Benchmark Token Cost",
+ "owner": "Administrator",
+ "permissions": [],
+ "row_format": "Dynamic",
+ "sort_field": "modified",
+ "sort_order": "DESC",
+ "states": []
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_token_cost/parser_benchmark_token_cost.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_token_cost/parser_benchmark_token_cost.py
new file mode 100644
index 0000000..68ac63b
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_token_cost/parser_benchmark_token_cost.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+from frappe.model.document import Document
+
+
+class ParserBenchmarkTokenCost(Document):
+ # begin: auto-generated types
+ # This code is auto-generated. Do not modify anything in this block.
+
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING:
+ from frappe.types import DF
+
+ ai_model: DF.Literal[
+ "DeepSeek Chat",
+ "DeepSeek Reasoner",
+ "OpenAI gpt-4o",
+ "OpenAI gpt-4o-mini",
+ "OpenAI gpt-5",
+ "OpenAI gpt-5-mini",
+ "Google Gemini Pro-2.5",
+ "Google Gemini Flash-2.5",
+ ]
+ currency: DF.Link
+ input_cost_per_million: DF.Currency
+ output_cost_per_million: DF.Currency
+ parent: DF.Data
+ parentfield: DF.Data
+ parenttype: DF.Data
+ # end: auto-generated types
+
+ pass
diff --git a/transaction_parser/parser_benchmark/report/__init__.py b/transaction_parser/parser_benchmark/report/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/__init__.py b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.js b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.js
new file mode 100644
index 0000000..5032a3c
--- /dev/null
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.js
@@ -0,0 +1,114 @@
+// Copyright (c) 2026, Resilient Tech and contributors
+// For license information, please see license.txt
+
+const AI_MODELS = [
+ "DeepSeek Chat",
+ "DeepSeek Reasoner",
+ "OpenAI gpt-4o",
+ "OpenAI gpt-4o-mini",
+ "OpenAI gpt-5",
+ "OpenAI gpt-5-mini",
+ "Google Gemini Pro-2.5",
+ "Google Gemini Flash-2.5",
+];
+
+const PDF_PROCESSORS = ["OCRMyPDF", "Docling"];
+
+const PARTY_TYPE_MAP = {
+ "Sales Order": "Customer",
+ Expense: "Supplier",
+};
+
+function make_options(items, txt) {
+ return items
+ .filter((v) => !txt || v.toLowerCase().includes(txt.toLowerCase()))
+ .map((v) => ({ value: v, description: "" }));
+}
+
+frappe.query_reports["Transaction Parser Accuracy Analysis"] = {
+ tree: true,
+ initial_depth: 1,
+
+ onload(report) {
+ set_party_type(report);
+ },
+
+ formatter(value, row, column, data, default_formatter) {
+ if (column.fieldname === "log_names" && value) {
+ const names = value.split(",").filter(Boolean);
+ if (!names.length) return value;
+
+ const filters = frappe.utils.get_url_from_dict({
+ name: JSON.stringify(["in", names]),
+ });
+ const url = `/app/parser-benchmark-log?${filters}`;
+ return `See Logs (${names.length})`;
+ }
+ return default_formatter(value, row, column, data);
+ },
+
+ filters: [
+ {
+ fieldname: "company",
+ label: __("Company"),
+ fieldtype: "Link",
+ options: "Company",
+ reqd: 1,
+ default: frappe.defaults.get_user_default("Company"),
+ },
+ {
+ fieldname: "transaction_type",
+ label: __("Transaction Type"),
+ fieldtype: "Select",
+ options: "\nSales Order\nExpense",
+ reqd: 1,
+ default: "Sales Order",
+ on_change() {
+ set_party_type(frappe.query_report);
+ },
+ },
+ {
+ fieldname: "party_type",
+ label: __("Party Type"),
+ fieldtype: "Link",
+ options: "DocType",
+ hidden: 1,
+ },
+ {
+ fieldname: "party",
+ label: __("Party"),
+ fieldtype: "Dynamic Link",
+ options: "party_type",
+ },
+ {
+ fieldname: "ai_model",
+ label: __("AI Model"),
+ fieldtype: "MultiSelectList",
+ get_data: (txt) => make_options(AI_MODELS, txt),
+ },
+ {
+ fieldname: "pdf_processor",
+ label: __("PDF Processor"),
+ fieldtype: "MultiSelectList",
+ get_data: (txt) => make_options(PDF_PROCESSORS, txt),
+ },
+ {
+ fieldname: "include_disabled_datasets",
+ label: __("Include Disabled Datasets"),
+ fieldtype: "Check",
+ default: 0,
+ },
+ {
+ fieldname: "is_multiple_files",
+ label: __("Multiple Files Only"),
+ fieldtype: "Check",
+ default: 0,
+ },
+ ],
+};
+
+function set_party_type(report) {
+ const transaction_type = report.get_filter_value("transaction_type");
+ const party_type = PARTY_TYPE_MAP[transaction_type] || "";
+ report.set_filter_value("party_type", party_type);
+}
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.json b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.json
new file mode 100644
index 0000000..75401e4
--- /dev/null
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.json
@@ -0,0 +1,28 @@
+{
+ "add_total_row": 0,
+ "add_translate_data": 0,
+ "columns": [],
+ "creation": "2026-03-24 20:26:18.136226",
+ "disabled": 0,
+ "docstatus": 0,
+ "doctype": "Report",
+ "filters": [],
+ "idx": 0,
+ "is_standard": "Yes",
+ "letter_head": null,
+ "modified": "2026-03-25 06:20:26.780337",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Transaction Parser Accuracy Analysis",
+ "owner": "Administrator",
+ "prepared_report": 0,
+ "ref_doctype": "Parser Benchmark Log",
+ "report_name": "Transaction Parser Accuracy Analysis",
+ "report_type": "Script Report",
+ "roles": [
+ {
+ "role": "System Manager"
+ }
+ ],
+ "timeout": 0
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.py b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.py
new file mode 100644
index 0000000..50e34f2
--- /dev/null
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.py
@@ -0,0 +1,485 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+# TODO: Need to refactor and Test
+# TODO: Need to add, what does getting mismatched!!!
+# TODO: Need to create a other report as well like comparing with commits
+
+from collections import defaultdict
+from enum import StrEnum
+
+import frappe
+from frappe import _
+from frappe.query_builder.functions import Coalesce
+
+PARTY_TYPE_MAP = {
+ "Sales Order": "Customer",
+ "Expense": "Supplier",
+}
+
+# Sorting order for child rows within each party group
+_AI_MODEL_ORDER = {
+ "OpenAI gpt-5": 0,
+ "OpenAI gpt-5-mini": 1,
+ "OpenAI gpt-4o": 2,
+ "OpenAI gpt-4o-mini": 3,
+ "Google Gemini Pro-2.5": 4,
+ "Google Gemini Flash-2.5": 5,
+ "DeepSeek Reasoner": 6,
+ "DeepSeek Chat": 7,
+}
+
+_PDF_PROCESSOR_ORDER = {
+ "OCRMyPDF": 0,
+ "Docling": 1,
+}
+
+
+class Col(StrEnum):
+ """Column fieldnames — single source of truth for the report."""
+
+ PARTY = "party"
+ PARTY_NAME = "party_name"
+ ACCURACY_SCORE = "accuracy_score"
+ AI_MODEL = "ai_model"
+ PDF_PROCESSOR = "pdf_processor"
+ FILE_PARSE_TIME = "file_parse_time"
+ FILE_PARSE_MEMORY = "file_parse_memory"
+ AI_PARSE_TIME = "ai_parse_time"
+ TOTAL_TIME = "total_time"
+ TOTAL_COST = "total_cost"
+ PROMPT_TOKENS = "prompt_tokens"
+ COMPLETION_TOKENS = "completion_tokens"
+ TOTAL_TOKENS = "total_tokens"
+ CURRENCY = "currency"
+ DATASET = "dataset"
+ RUN_COUNT = "run_count"
+ KEY_SCORES = "key_scores"
+ LOG_NAMES = "log_names"
+
+
+# Fields averaged in party-group summary rows
+_AVG_FIELDS = (
+ Col.ACCURACY_SCORE,
+ Col.FILE_PARSE_TIME,
+ Col.FILE_PARSE_MEMORY,
+ Col.AI_PARSE_TIME,
+ Col.TOTAL_TIME,
+ Col.PROMPT_TOKENS,
+ Col.COMPLETION_TOKENS,
+ Col.TOTAL_TOKENS,
+)
+
+# Fields summed in party-group summary rows
+_SUM_FIELDS = (Col.TOTAL_COST,)
+
+
+def execute(filters=None):
+ filters = frappe._dict(filters or {})
+ return AccuracyAnalysisReport(filters).run()
+
+
+class AccuracyAnalysisReport:
+ def __init__(self, filters: frappe._dict):
+ self.filters = filters
+ self._set_party_type()
+ self.group_by_party = True
+
+ def run(self):
+ logs = self._fetch_logs()
+ score_details_map = self._fetch_score_details([r.log_name for r in logs])
+
+ self.data = [self._build_row(r, score_details_map) for r in logs]
+ self._aggregate_by_config()
+
+ if self.group_by_party:
+ self._group_by_party()
+
+ # discover all unique key names for dynamic columns
+ all_keys = dict.fromkeys(
+ k for row in self.data for k in (row.get("_key_accuracies") or {})
+ )
+
+ # strip internal keys before sending to client
+ for row in self.data:
+ row.pop("_key_accuracies", None)
+
+ return self._get_columns(list(all_keys)), self.data
+
+ # ── Columns ──────────────────────────────────────────────────────
+
+ def _get_columns(self, key_names=None):
+ columns = [
+ {
+ "fieldname": Col.PARTY,
+ "label": _("Party"),
+ "fieldtype": "Data",
+ "width": 200,
+ },
+ {
+ "fieldname": Col.PARTY_NAME,
+ "label": _("Party Name"),
+ "fieldtype": "Data",
+ "width": 200,
+ },
+ {
+ "fieldname": Col.DATASET,
+ "label": _("Dataset"),
+ "fieldtype": "Link",
+ "options": "Parser Benchmark Dataset",
+ "width": 160,
+ },
+ {
+ "fieldname": Col.RUN_COUNT,
+ "label": _("Runs"),
+ "fieldtype": "Int",
+ "width": 70,
+ },
+ {
+ "fieldname": Col.ACCURACY_SCORE,
+ "label": _("Accuracy (%)"),
+ "fieldtype": "Percent",
+ "width": 120,
+ },
+ {
+ "fieldname": Col.AI_MODEL,
+ "label": _("AI Model"),
+ "fieldtype": "Data",
+ "width": 180,
+ },
+ {
+ "fieldname": Col.PDF_PROCESSOR,
+ "label": _("Processor"),
+ "fieldtype": "Data",
+ "width": 110,
+ },
+ {
+ "fieldname": Col.FILE_PARSE_TIME,
+ "label": _("File Parse (s)"),
+ "fieldtype": "Float",
+ "width": 120,
+ "precision": 2,
+ },
+ {
+ "fieldname": Col.FILE_PARSE_MEMORY,
+ "label": _("Memory (MB)"),
+ "fieldtype": "Float",
+ "width": 110,
+ "precision": 2,
+ },
+ {
+ "fieldname": Col.AI_PARSE_TIME,
+ "label": _("AI Parse (s)"),
+ "fieldtype": "Float",
+ "width": 110,
+ "precision": 2,
+ },
+ {
+ "fieldname": Col.TOTAL_TIME,
+ "label": _("Total (s)"),
+ "fieldtype": "Float",
+ "width": 100,
+ "precision": 2,
+ },
+ {
+ "fieldname": Col.TOTAL_COST,
+ "label": _("Total Cost"),
+ "fieldtype": "Currency",
+ "width": 110,
+ "options": Col.CURRENCY,
+ },
+ {
+ "fieldname": Col.PROMPT_TOKENS,
+ "label": _("Prompt Tokens"),
+ "fieldtype": "Int",
+ "width": 120,
+ },
+ {
+ "fieldname": Col.COMPLETION_TOKENS,
+ "label": _("Compl. Tokens"),
+ "fieldtype": "Int",
+ "width": 120,
+ },
+ {
+ "fieldname": Col.TOTAL_TOKENS,
+ "label": _("Total Tokens"),
+ "fieldtype": "Int",
+ "width": 110,
+ },
+ ]
+
+ # dynamic per-key accuracy columns
+ for key in key_names or []:
+ columns.append(
+ {
+ "fieldname": f"key_{key}",
+ "label": _(key.replace("_", " ").title() + " (%)"),
+ "fieldtype": "Percent",
+ "width": 120,
+ }
+ )
+
+ columns.append(
+ {
+ "fieldname": Col.LOG_NAMES,
+ "label": _("Logs"),
+ "fieldtype": "Data",
+ "width": 100,
+ }
+ )
+
+ return columns
+
+ # ── Query ─────────────────────────────────────────────────────
+
+ def _fetch_logs(self):
+ log = frappe.qb.DocType("Parser Benchmark Log")
+ ds = frappe.qb.DocType("Parser Benchmark Dataset")
+ cust = frappe.qb.DocType("Customer")
+ supp = frappe.qb.DocType("Supplier")
+
+ query = (
+ frappe.qb.from_(log)
+ .join(ds)
+ .on(log.dataset == ds.name)
+ .left_join(cust)
+ .on((ds.party_type == "Customer") & (ds.party == cust.name))
+ .left_join(supp)
+ .on((ds.party_type == "Supplier") & (ds.party == supp.name))
+ .select(
+ log.name.as_("log_name"),
+ log.ai_model,
+ log.pdf_processor,
+ log.accuracy_score,
+ log.file_parse_time,
+ log.file_parse_memory,
+ log.ai_parse_time,
+ log.total_time,
+ log.total_cost,
+ log.prompt_tokens,
+ log.completion_tokens,
+ log.total_tokens,
+ log.currency,
+ log.dataset,
+ ds.party,
+ Coalesce(cust.customer_name, supp.supplier_name, ds.party).as_(
+ "party_name"
+ ),
+ )
+ .where(log.status == "Completed")
+ .where(ds.docstatus == 1)
+ .orderby(ds.party, log.ai_model)
+ )
+
+ if not self.filters.get("include_disabled_datasets"):
+ query = query.where(ds.enabled == 1)
+
+ if self.filters.get("is_multiple_files"):
+ query = query.where(ds.is_multiple_files == 1)
+
+ # exact-match filters
+ for column, key in (
+ (ds.company, "company"),
+ (ds.transaction_type, "transaction_type"),
+ (ds.party_type, "party_type"),
+ (ds.party, "party"),
+ ):
+ if self.filters.get(key):
+ query = query.where(column == self.filters[key])
+
+ # multi-select IN filters
+ for column, key in (
+ (log.ai_model, "ai_model"),
+ (log.pdf_processor, "pdf_processor"),
+ ):
+ values = self.filters.get(key)
+ if values:
+ items = values if isinstance(values, list) else [values]
+ query = query.where(column.isin(items))
+
+ return query.run(as_dict=True)
+
+ def _fetch_score_details(self, log_names: list[str]) -> dict[str, list[dict]]:
+ """Fetch score_details child rows for all logs at once."""
+ if not log_names:
+ return {}
+
+ sd = frappe.qb.DocType("Parser Benchmark Score Detail")
+ rows = (
+ frappe.qb.from_(sd)
+ .select(sd.parent, sd.key, sd.matched, sd.total, sd.accuracy)
+ .where(sd.parent.isin(log_names))
+ .orderby(sd.idx)
+ .run(as_dict=True)
+ )
+
+ details_map: dict[str, list[dict]] = defaultdict(list)
+ for row in rows:
+ details_map[row.parent].append(row)
+
+ return details_map
+
+ # ── Helpers ──────────────────────────────────────────────────────
+
+ def _set_party_type(self):
+ """Derive party_type from transaction_type when not explicitly set."""
+ transaction_type = self.filters.get("transaction_type")
+ if transaction_type and not self.filters.get("party_type"):
+ self.filters["party_type"] = PARTY_TYPE_MAP.get(transaction_type)
+
+ def _build_row(self, r, score_details_map):
+ """Build a single detail row from a log record."""
+ details = score_details_map.get(r.log_name, [])
+ key_accuracies = {d["key"]: d["accuracy"] for d in details}
+
+ row = {
+ Col.PARTY: r.party or _("No Party"),
+ Col.PARTY_NAME: r.party_name or "",
+ Col.ACCURACY_SCORE: r.accuracy_score,
+ Col.AI_MODEL: r.ai_model,
+ Col.PDF_PROCESSOR: r.pdf_processor,
+ Col.DATASET: r.dataset,
+ Col.FILE_PARSE_TIME: r.file_parse_time,
+ Col.FILE_PARSE_MEMORY: r.file_parse_memory,
+ Col.AI_PARSE_TIME: r.ai_parse_time,
+ Col.TOTAL_TIME: r.total_time,
+ Col.TOTAL_COST: r.total_cost,
+ Col.PROMPT_TOKENS: r.prompt_tokens,
+ Col.COMPLETION_TOKENS: r.completion_tokens,
+ Col.TOTAL_TOKENS: r.total_tokens,
+ Col.CURRENCY: r.currency,
+ Col.LOG_NAMES: r.log_name,
+ "_key_accuracies": key_accuracies,
+ }
+
+ # per-key accuracy as separate fields
+ for k, v in key_accuracies.items():
+ row[f"key_{k}"] = v
+
+ return row
+
+ # ── Aggregation ──────────────────────────────────────────────────
+
+ def _aggregate_by_config(self):
+ """Collapse multiple runs of the same config into one averaged row."""
+ if not self.data:
+ return
+
+ groups: dict[tuple, list[dict]] = defaultdict(list)
+ for row in self.data:
+ key = (
+ row.get(Col.DATASET),
+ row.get(Col.AI_MODEL),
+ row.get(Col.PDF_PROCESSOR) or "",
+ )
+ groups[key].append(row)
+
+ aggregated = []
+ for _key, rows in groups.items():
+ count = len(rows)
+ agg = {
+ Col.PARTY: rows[0].get(Col.PARTY),
+ Col.PARTY_NAME: rows[0].get(Col.PARTY_NAME),
+ Col.AI_MODEL: rows[0].get(Col.AI_MODEL),
+ Col.PDF_PROCESSOR: rows[0].get(Col.PDF_PROCESSOR),
+ Col.CURRENCY: rows[0].get(Col.CURRENCY),
+ Col.RUN_COUNT: count,
+ }
+
+ agg[Col.DATASET] = rows[0].get(Col.DATASET, "")
+
+ # collect all log names used
+ agg[Col.LOG_NAMES] = ",".join(
+ r.get(Col.LOG_NAMES) for r in rows if r.get(Col.LOG_NAMES)
+ )
+
+ for field in _AVG_FIELDS:
+ vals = [r.get(field) or 0 for r in rows]
+ agg[field] = round(sum(vals) / count, 2) if count else 0
+
+ for field in _SUM_FIELDS:
+ agg[field] = round(sum(r.get(field) or 0 for r in rows) / count, 6)
+
+ # aggregate per-key accuracies
+ all_key_accs: dict[str, list[float]] = defaultdict(list)
+ for r in rows:
+ for k, v in r.get("_key_accuracies", {}).items():
+ all_key_accs[k].append(v or 0)
+
+ avg_key_accs = {
+ k: round(sum(v) / len(v), 1) for k, v in all_key_accs.items()
+ }
+ agg["_key_accuracies"] = avg_key_accs
+
+ for k, v in avg_key_accs.items():
+ agg[f"key_{k}"] = v
+
+ aggregated.append(agg)
+
+ self.data = aggregated
+
+ # ── Grouping ─────────────────────────────────────────────────────
+
+ def _group_by_party(self):
+ """Group data by party, creating tree view with indent levels."""
+ if not self.data:
+ return
+
+ grouped = defaultdict(list)
+ for row in self.data:
+ party = row.get(Col.PARTY) or _("No Party")
+ row["indent"] = 1
+ grouped[party].append(row)
+
+ tree_data = []
+ for party, rows in grouped.items():
+ rows.sort(key=self._sort_key)
+ tree_data.append(self._group_row(party, rows))
+ tree_data.extend(rows)
+
+ self.data = tree_data
+
+ @staticmethod
+ def _sort_key(row):
+ """Sort key for child rows: AI Model → PDF Processor."""
+ return (
+ _AI_MODEL_ORDER.get(row.get(Col.AI_MODEL), 99),
+ _PDF_PROCESSOR_ORDER.get(row.get(Col.PDF_PROCESSOR), 99),
+ )
+
+ def _group_row(self, party, rows):
+ """Aggregated summary row for a party group (indent 0)."""
+ count = len(rows)
+ row = {
+ Col.PARTY: party,
+ Col.PARTY_NAME: rows[0].get(Col.PARTY_NAME),
+ Col.CURRENCY: rows[0].get(Col.CURRENCY),
+ Col.RUN_COUNT: sum(r.get(Col.RUN_COUNT, 1) for r in rows),
+ "indent": 0,
+ }
+
+ # collect all log names from children
+ row[Col.LOG_NAMES] = ",".join(
+ r.get(Col.LOG_NAMES) for r in rows if r.get(Col.LOG_NAMES)
+ )
+
+ for field in _AVG_FIELDS:
+ vals = [r.get(field) or 0 for r in rows]
+ row[field] = round(sum(vals) / count, 2) if count else 0
+
+ for field in _SUM_FIELDS:
+ row[field] = round(sum(r.get(field) or 0 for r in rows), 6)
+
+ # aggregate per-key accuracies across all child rows
+ all_key_accs: dict[str, list[float]] = defaultdict(list)
+ for r in rows:
+ for k, v in r.get("_key_accuracies", {}).items():
+ all_key_accs[k].append(v or 0)
+
+ avg_key_accs = {k: round(sum(v) / len(v), 1) for k, v in all_key_accs.items()}
+ for k, v in avg_key_accs.items():
+ row[f"key_{k}"] = v
+
+ row["_key_accuracies"] = avg_key_accs
+
+ return row
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/__init__.py b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.js b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.js
new file mode 100644
index 0000000..bd831ca
--- /dev/null
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.js
@@ -0,0 +1,114 @@
+// Copyright (c) 2026, Resilient Tech and contributors
+// For license information, please see license.txt
+
+const AI_MODELS = [
+ "DeepSeek Chat",
+ "DeepSeek Reasoner",
+ "OpenAI gpt-4o",
+ "OpenAI gpt-4o-mini",
+ "OpenAI gpt-5",
+ "OpenAI gpt-5-mini",
+ "Google Gemini Pro-2.5",
+ "Google Gemini Flash-2.5",
+];
+
+const PDF_PROCESSORS = ["OCRMyPDF", "Docling"];
+
+const PARTY_TYPE_MAP = {
+ "Sales Order": "Customer",
+ Expense: "Supplier",
+};
+
+function make_options(items, txt) {
+ return items
+ .filter((v) => !txt || v.toLowerCase().includes(txt.toLowerCase()))
+ .map((v) => ({ value: v, description: "" }));
+}
+
+frappe.query_reports["Transaction Parser Version Comparison"] = {
+ tree: true,
+ initial_depth: 1,
+
+ onload(report) {
+ set_party_type(report);
+ },
+
+ formatter(value, row, column, data, default_formatter) {
+ if (column.fieldname === "log_names" && value) {
+ const names = value.split(",").filter(Boolean);
+ if (!names.length) return value;
+
+ const filters = frappe.utils.get_url_from_dict({
+ name: JSON.stringify(["in", names]),
+ });
+ const url = `/app/parser-benchmark-log?${filters}`;
+ return `See Logs (${names.length})`;
+ }
+ return default_formatter(value, row, column, data);
+ },
+
+ filters: [
+ {
+ fieldname: "company",
+ label: __("Company"),
+ fieldtype: "Link",
+ options: "Company",
+ reqd: 1,
+ default: frappe.defaults.get_user_default("Company"),
+ },
+ {
+ fieldname: "transaction_type",
+ label: __("Transaction Type"),
+ fieldtype: "Select",
+ options: "\nSales Order\nExpense",
+ reqd: 1,
+ default: "Sales Order",
+ on_change() {
+ set_party_type(frappe.query_report);
+ },
+ },
+ {
+ fieldname: "party_type",
+ label: __("Party Type"),
+ fieldtype: "Link",
+ options: "DocType",
+ hidden: 1,
+ },
+ {
+ fieldname: "party",
+ label: __("Party"),
+ fieldtype: "Dynamic Link",
+ options: "party_type",
+ },
+ {
+ fieldname: "ai_model",
+ label: __("AI Model"),
+ fieldtype: "MultiSelectList",
+ get_data: (txt) => make_options(AI_MODELS, txt),
+ },
+ {
+ fieldname: "pdf_processor",
+ label: __("PDF Processor"),
+ fieldtype: "MultiSelectList",
+ get_data: (txt) => make_options(PDF_PROCESSORS, txt),
+ },
+ {
+ fieldname: "include_disabled_datasets",
+ label: __("Include Disabled Datasets"),
+ fieldtype: "Check",
+ default: 0,
+ },
+ {
+ fieldname: "is_multiple_files",
+ label: __("Multiple Files Only"),
+ fieldtype: "Check",
+ default: 0,
+ },
+ ],
+};
+
+function set_party_type(report) {
+ const transaction_type = report.get_filter_value("transaction_type");
+ const party_type = PARTY_TYPE_MAP[transaction_type] || "";
+ report.set_filter_value("party_type", party_type);
+}
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.json b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.json
new file mode 100644
index 0000000..a0fbf68
--- /dev/null
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.json
@@ -0,0 +1,28 @@
+{
+ "add_total_row": 0,
+ "add_translate_data": 0,
+ "columns": [],
+ "creation": "2026-03-30 00:00:00",
+ "disabled": 0,
+ "docstatus": 0,
+ "doctype": "Report",
+ "filters": [],
+ "idx": 0,
+ "is_standard": "Yes",
+ "letter_head": null,
+ "modified": "2026-03-30 09:38:02.128996",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Transaction Parser Version Comparison",
+ "owner": "Administrator",
+ "prepared_report": 0,
+ "ref_doctype": "Parser Benchmark Log",
+ "report_name": "Transaction Parser Version Comparison",
+ "report_type": "Script Report",
+ "roles": [
+ {
+ "role": "System Manager"
+ }
+ ],
+ "timeout": 0
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.py b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.py
new file mode 100644
index 0000000..5af7dda
--- /dev/null
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.py
@@ -0,0 +1,403 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+from collections import defaultdict
+from enum import StrEnum
+
+import frappe
+from frappe import _
+from frappe.query_builder.functions import Coalesce
+
+PARTY_TYPE_MAP = {
+ "Sales Order": "Customer",
+ "Expense": "Supplier",
+}
+
+# Sorting order for child rows within each party group
+_AI_MODEL_ORDER = {
+ "OpenAI gpt-5": 0,
+ "OpenAI gpt-5-mini": 1,
+ "OpenAI gpt-4o": 2,
+ "OpenAI gpt-4o-mini": 3,
+ "Google Gemini Pro-2.5": 4,
+ "Google Gemini Flash-2.5": 5,
+ "DeepSeek Reasoner": 6,
+ "DeepSeek Chat": 7,
+}
+
+_PDF_PROCESSOR_ORDER = {
+ "OCRMyPDF": 0,
+ "Docling": 1,
+}
+
+
+class Col(StrEnum):
+ """Column fieldnames — single source of truth for the report."""
+
+ PARTY = "party"
+ PARTY_NAME = "party_name"
+ DATASET = "dataset"
+ AI_MODEL = "ai_model"
+ PDF_PROCESSOR = "pdf_processor"
+ COMMIT_HASH = "commit_hash"
+ COMMIT_MESSAGE = "commit_message"
+ ACCURACY_SCORE = "accuracy_score"
+ KEY_SCORES = "key_scores"
+ RUN_COUNT = "run_count"
+ LOG_NAMES = "log_names"
+
+
+def execute(filters=None):
+ filters = frappe._dict(filters or {})
+ return VersionComparisonReport(filters).run()
+
+
+class VersionComparisonReport:
+ def __init__(self, filters: frappe._dict):
+ self.filters = filters
+ self._set_party_type()
+
+ def run(self):
+ logs = self._fetch_logs()
+ if not logs:
+ return self._get_columns(), []
+
+ score_details_map = self._fetch_score_details([r.log_name for r in logs])
+
+ self.data = [self._build_row(r, score_details_map) for r in logs]
+ self._aggregate_by_config()
+ self._group_by_party()
+
+ # discover all unique key names for dynamic columns
+ all_keys = dict.fromkeys(
+ k for row in self.data for k in (row.get("_key_accuracies") or {})
+ )
+
+ # strip internal keys
+ for row in self.data:
+ row.pop("_key_accuracies", None)
+
+ return self._get_columns(list(all_keys)), self.data
+
+ # ── Columns ──────────────────────────────────────────────────────
+
+ def _get_columns(self, key_names=None):
+ columns = [
+ {
+ "fieldname": Col.PARTY,
+ "label": _("Party"),
+ "fieldtype": "Data",
+ "width": 200,
+ },
+ {
+ "fieldname": Col.PARTY_NAME,
+ "label": _("Party Name"),
+ "fieldtype": "Data",
+ "width": 200,
+ },
+ {
+ "fieldname": Col.DATASET,
+ "label": _("Dataset"),
+ "fieldtype": "Link",
+ "options": "Parser Benchmark Dataset",
+ "width": 160,
+ },
+ {
+ "fieldname": Col.AI_MODEL,
+ "label": _("AI Model"),
+ "fieldtype": "Data",
+ "width": 180,
+ },
+ {
+ "fieldname": Col.PDF_PROCESSOR,
+ "label": _("Processor"),
+ "fieldtype": "Data",
+ "width": 110,
+ },
+ {
+ "fieldname": Col.COMMIT_HASH,
+ "label": _("Commit"),
+ "fieldtype": "Data",
+ "width": 100,
+ },
+ {
+ "fieldname": Col.COMMIT_MESSAGE,
+ "label": _("Commit Message"),
+ "fieldtype": "Data",
+ "width": 250,
+ },
+ {
+ "fieldname": Col.RUN_COUNT,
+ "label": _("Runs"),
+ "fieldtype": "Int",
+ "width": 60,
+ },
+ {
+ "fieldname": Col.ACCURACY_SCORE,
+ "label": _("Accuracy (%)"),
+ "fieldtype": "Percent",
+ "width": 120,
+ },
+ ]
+
+ # dynamic per-key accuracy columns
+ for key in key_names or []:
+ columns.append(
+ {
+ "fieldname": f"key_{key}",
+ "label": _(key.replace("_", " ").title() + " (%)"),
+ "fieldtype": "Percent",
+ "width": 120,
+ }
+ )
+
+ columns.append(
+ {
+ "fieldname": Col.LOG_NAMES,
+ "label": _("Logs"),
+ "fieldtype": "Data",
+ "width": 100,
+ }
+ )
+
+ return columns
+
+ # ── Query ────────────────────────────────────────────────────────
+
+ def _fetch_logs(self):
+ log = frappe.qb.DocType("Parser Benchmark Log")
+ ds = frappe.qb.DocType("Parser Benchmark Dataset")
+ cust = frappe.qb.DocType("Customer")
+ supp = frappe.qb.DocType("Supplier")
+
+ query = (
+ frappe.qb.from_(log)
+ .join(ds)
+ .on(log.dataset == ds.name)
+ .left_join(cust)
+ .on((ds.party_type == "Customer") & (ds.party == cust.name))
+ .left_join(supp)
+ .on((ds.party_type == "Supplier") & (ds.party == supp.name))
+ .select(
+ log.name.as_("log_name"),
+ log.ai_model,
+ log.pdf_processor,
+ log.accuracy_score,
+ log.dataset,
+ log.commit_hash,
+ log.commit_message,
+ ds.party,
+ Coalesce(cust.customer_name, supp.supplier_name, ds.party).as_(
+ "party_name"
+ ),
+ )
+ .where(log.status == "Completed")
+ .where(ds.docstatus == 1)
+ .where(Coalesce(log.commit_hash, "") != "")
+ .orderby(ds.party, log.ai_model)
+ )
+
+ if not self.filters.get("include_disabled_datasets"):
+ query = query.where(ds.enabled == 1)
+
+ if self.filters.get("is_multiple_files"):
+ query = query.where(ds.is_multiple_files == 1)
+
+ # exact-match filters
+ for column, key in (
+ (ds.company, "company"),
+ (ds.transaction_type, "transaction_type"),
+ (ds.party_type, "party_type"),
+ (ds.party, "party"),
+ ):
+ if self.filters.get(key):
+ query = query.where(column == self.filters[key])
+
+ # multi-select IN filters
+ for column, key in (
+ (log.ai_model, "ai_model"),
+ (log.pdf_processor, "pdf_processor"),
+ ):
+ values = self.filters.get(key)
+ if values:
+ items = values if isinstance(values, list) else [values]
+ query = query.where(column.isin(items))
+
+ return query.run(as_dict=True)
+
+ def _fetch_score_details(self, log_names: list[str]) -> dict[str, list[dict]]:
+ """Fetch score_details child rows for all logs at once."""
+ if not log_names:
+ return {}
+
+ sd = frappe.qb.DocType("Parser Benchmark Score Detail")
+ rows = (
+ frappe.qb.from_(sd)
+ .select(sd.parent, sd.key, sd.matched, sd.total, sd.accuracy)
+ .where(sd.parent.isin(log_names))
+ .orderby(sd.idx)
+ .run(as_dict=True)
+ )
+
+ details_map: dict[str, list[dict]] = defaultdict(list)
+ for row in rows:
+ details_map[row.parent].append(row)
+
+ return details_map
+
+ # ── Helpers ──────────────────────────────────────────────────────
+
+ def _set_party_type(self):
+ """Derive party_type from transaction_type when not explicitly set."""
+ transaction_type = self.filters.get("transaction_type")
+ if transaction_type and not self.filters.get("party_type"):
+ self.filters["party_type"] = PARTY_TYPE_MAP.get(transaction_type)
+
+ def _build_row(self, r, score_details_map):
+ """Build a single detail row from a log record."""
+ details = score_details_map.get(r.log_name, [])
+ key_accuracies = {d["key"]: d["accuracy"] for d in details}
+
+ short_hash = (r.commit_hash or "")[:7]
+ commit_msg = (r.commit_message or "").split("\n")[0][:80]
+
+ row = {
+ Col.PARTY: r.party or _("No Party"),
+ Col.PARTY_NAME: r.party_name or "",
+ Col.DATASET: r.dataset,
+ Col.AI_MODEL: r.ai_model,
+ Col.PDF_PROCESSOR: r.pdf_processor,
+ Col.COMMIT_HASH: short_hash,
+ Col.COMMIT_MESSAGE: commit_msg,
+ Col.ACCURACY_SCORE: r.accuracy_score,
+ Col.LOG_NAMES: r.log_name,
+ Col.RUN_COUNT: 1,
+ "_key_accuracies": key_accuracies,
+ }
+
+ # per-key accuracy as separate fields
+ for k, v in key_accuracies.items():
+ row[f"key_{k}"] = v
+
+ return row
+
+ # ── Aggregation ──────────────────────────────────────────────────
+
+ def _aggregate_by_config(self):
+ """Collapse multiple runs of same config + commit into one averaged row."""
+ if not self.data:
+ return
+
+ groups: dict[tuple, list[dict]] = defaultdict(list)
+ for row in self.data:
+ key = (
+ row.get(Col.DATASET),
+ row.get(Col.AI_MODEL),
+ row.get(Col.PDF_PROCESSOR) or "",
+ row.get(Col.COMMIT_HASH),
+ )
+ groups[key].append(row)
+
+ aggregated = []
+ for _key, rows in groups.items():
+ count = len(rows)
+ agg = {
+ Col.PARTY: rows[0].get(Col.PARTY),
+ Col.PARTY_NAME: rows[0].get(Col.PARTY_NAME),
+ Col.DATASET: rows[0].get(Col.DATASET),
+ Col.AI_MODEL: rows[0].get(Col.AI_MODEL),
+ Col.PDF_PROCESSOR: rows[0].get(Col.PDF_PROCESSOR),
+ Col.COMMIT_HASH: rows[0].get(Col.COMMIT_HASH),
+ Col.COMMIT_MESSAGE: rows[0].get(Col.COMMIT_MESSAGE),
+ Col.RUN_COUNT: count,
+ }
+
+ # collect all log names used
+ agg[Col.LOG_NAMES] = ",".join(
+ r.get(Col.LOG_NAMES) for r in rows if r.get(Col.LOG_NAMES)
+ )
+
+ # average accuracy
+ vals = [r.get(Col.ACCURACY_SCORE) or 0 for r in rows]
+ agg[Col.ACCURACY_SCORE] = round(sum(vals) / count, 2) if count else 0
+
+ # aggregate per-key accuracies
+ all_key_accs: dict[str, list[float]] = defaultdict(list)
+ for r in rows:
+ for k, v in r.get("_key_accuracies", {}).items():
+ all_key_accs[k].append(v or 0)
+
+ avg_key_accs = {
+ k: round(sum(v) / len(v), 1) for k, v in all_key_accs.items()
+ }
+ agg["_key_accuracies"] = avg_key_accs
+
+ for k, v in avg_key_accs.items():
+ agg[f"key_{k}"] = v
+
+ aggregated.append(agg)
+
+ self.data = aggregated
+
+ # ── Grouping ─────────────────────────────────────────────────────
+
+ def _group_by_party(self):
+ """Group data by party, creating tree view with indent levels."""
+ if not self.data:
+ return
+
+ grouped = defaultdict(list)
+ for row in self.data:
+ party = row.get(Col.PARTY) or _("No Party")
+ row["indent"] = 1
+ grouped[party].append(row)
+
+ tree_data = []
+ for party, rows in grouped.items():
+ rows.sort(key=self._sort_key)
+ tree_data.append(self._group_row(party, rows))
+ tree_data.extend(rows)
+
+ self.data = tree_data
+
+ @staticmethod
+ def _sort_key(row):
+ """Sort: AI Model → PDF Processor → Commit Hash."""
+ return (
+ _AI_MODEL_ORDER.get(row.get(Col.AI_MODEL), 99),
+ _PDF_PROCESSOR_ORDER.get(row.get(Col.PDF_PROCESSOR), 99),
+ row.get(Col.COMMIT_HASH) or "",
+ )
+
+ def _group_row(self, party, rows):
+ """Aggregated summary row for a party group (indent 0)."""
+ count = len(rows)
+ row = {
+ Col.PARTY: party,
+ Col.PARTY_NAME: rows[0].get(Col.PARTY_NAME),
+ Col.RUN_COUNT: sum(r.get(Col.RUN_COUNT, 1) for r in rows),
+ "indent": 0,
+ }
+
+ # collect all log names from children
+ row[Col.LOG_NAMES] = ",".join(
+ r.get(Col.LOG_NAMES) for r in rows if r.get(Col.LOG_NAMES)
+ )
+
+ # average accuracy across all child rows
+ vals = [r.get(Col.ACCURACY_SCORE) or 0 for r in rows]
+ row[Col.ACCURACY_SCORE] = round(sum(vals) / count, 2) if count else 0
+
+ # aggregate per-key accuracies
+ all_key_accs: dict[str, list[float]] = defaultdict(list)
+ for r in rows:
+ for k, v in r.get("_key_accuracies", {}).items():
+ all_key_accs[k].append(v or 0)
+
+ avg_key_accs = {k: round(sum(v) / len(v), 1) for k, v in all_key_accs.items()}
+ for k, v in avg_key_accs.items():
+ row[f"key_{k}"] = v
+
+ row["_key_accuracies"] = avg_key_accs
+
+ return row
diff --git a/transaction_parser/parser_benchmark/runner.py b/transaction_parser/parser_benchmark/runner.py
new file mode 100644
index 0000000..ff93e7c
--- /dev/null
+++ b/transaction_parser/parser_benchmark/runner.py
@@ -0,0 +1,331 @@
+import tracemalloc
+from timeit import default_timer
+
+import frappe
+from frappe import _
+from frappe.core.doctype.file.file import File
+from frappe.utils import cint, flt
+
+from transaction_parser.parser_benchmark.doctype.parser_benchmark_dataset.parser_benchmark_dataset import (
+ ParserBenchmarkDataset,
+)
+from transaction_parser.parser_benchmark.doctype.parser_benchmark_log.parser_benchmark_log import (
+ ParserBenchmarkLog,
+)
+from transaction_parser.parser_benchmark.doctype.parser_benchmark_settings.parser_benchmark_settings import (
+ ParserBenchmarkSettings,
+)
+from transaction_parser.transaction_parser.ai_integration.parser import AIParser
+from transaction_parser.transaction_parser.controllers import get_controller
+from transaction_parser.transaction_parser.controllers.transaction import Transaction
+from transaction_parser.transaction_parser.utils.file_processor import FileProcessor
+from transaction_parser.transaction_parser.utils.pdf_processor import get_pdf_processor
+from transaction_parser.transaction_parser.utils.response_merger import ResponseMerger
+
+
+class BenchmarkRunner:
+ """
+ Runs a single benchmark for one AI model + PDF processor combination.
+
+ Flow:
+ 1. File parsing → time, memory, extracted content
+ 2. AI parsing → time, tokens, cost, AI response
+ """
+
+ def __init__(
+ self,
+ log_name: str,
+ dataset: ParserBenchmarkDataset | None = None,
+ settings: ParserBenchmarkSettings | None = None,
+ ):
+ self.log: ParserBenchmarkLog = frappe.get_doc("Parser Benchmark Log", log_name)
+ self.dataset: ParserBenchmarkDataset = dataset or frappe.get_cached_doc(
+ "Parser Benchmark Dataset", self.log.dataset
+ )
+ self.settings: ParserBenchmarkSettings = settings or frappe.get_cached_doc(
+ "Parser Benchmark Settings"
+ )
+ self.precision = 6 # to get 1-millionth of a token cost
+
+ # for accuracy score
+ self.significant_digits = cint(frappe.db.get_default("float_precision")) or 2
+
+ def run(self):
+ self.log.status = "Running"
+ self.log.currency = "USD"
+ self.log.save(ignore_permissions=True)
+ frappe.db.commit() # nosemgrep -- persist "Running" status before long-running benchmark
+
+ total_start = default_timer()
+
+ try:
+ file_docs: list[File] = self._get_file_docs()
+ self.controller: Transaction = self._get_controller(file_docs)
+
+ file_contents = self._run_file_parsing(file_docs)
+ ai_content = self._run_ai_parsing(file_contents, file_docs)
+ self._calculate_cost()
+ self._score_response(ai_content)
+
+ self.log.status = "Completed"
+
+ except Exception:
+ self.log.status = "Failed"
+ self.log.error = frappe.get_traceback()
+
+ finally:
+ self.log.total_time = flt(default_timer() - total_start, self.precision)
+ self.log.save(ignore_permissions=True)
+ frappe.db.commit() # nosemgrep -- persist final results inside background job
+
+ return self.log.name
+
+ # ── helpers ──────────────────────────────────────────────
+
+ def _get_file_docs(self) -> list[File]:
+ file_docs = self.dataset.get_file_docs()
+ if not file_docs:
+ frappe.throw(_("No files in dataset {0}").format(self.dataset.name))
+
+ # sort by file type priority to match the actual parser behavior
+ file_docs = self._sort_files_by_priority(file_docs)
+ return file_docs
+
+ def _sort_files_by_priority(self, file_docs: list[File]) -> list[File]:
+ """Sort files by type priority: xlsx/xls first, then pdf, then csv.
+
+ If no xlsx/xls files exist, csv takes priority over pdf.
+ This mirrors the sorting logic in _parse() to ensure consistent file ordering.
+ """
+ # TODO: Too many code repetation with transaction_parser/transaction_parser/__init__.py. Refactor to centralize file sorting logic.
+ file_types = {(f.file_type or "").lower() for f in file_docs}
+ has_spreadsheet = file_types & {"xlsx", "xls"}
+
+ if has_spreadsheet:
+ FILE_TYPE_PRIORITY = {"xlsx": 0, "xls": 0, "pdf": 1, "csv": 2}
+ else:
+ FILE_TYPE_PRIORITY = {"csv": 0, "pdf": 1}
+
+ return sorted(
+ file_docs,
+ key=lambda f: FILE_TYPE_PRIORITY.get((f.file_type or "").lower(), 99),
+ )
+
+ def _get_controller(self, file_docs: list[File]) -> Transaction:
+ ds = self.dataset
+ cls = get_controller(ds.country, ds.transaction_type)
+
+ controller: Transaction = cls(company=ds.company, party=ds.party)
+ controller.initialize()
+ controller.files = file_docs
+ controller.ai_model = self.log.ai_model
+
+ return controller
+
+ def _get_cost_row(self):
+ for row in self.settings.token_costs:
+ if row.ai_model == self.log.ai_model:
+ return row
+
+ return None
+
+ # ── step 1: file parsing ────────────────────────────────
+
+ def _run_file_parsing(self, file_docs: list[File]) -> list[str]:
+ # TODO: It is assumed that Process One Document Per Communication is enabled
+ # to prevent stopping an already running tracemalloc instance
+ was_tracing = tracemalloc.is_tracing()
+ if not was_tracing:
+ tracemalloc.start()
+
+ start = default_timer()
+ try:
+ contents = []
+ for file_doc in file_docs:
+ pdf_processor = None
+ if file_doc.file_type == "PDF" and self.log.pdf_processor:
+ pdf_processor = get_pdf_processor(self.log.pdf_processor)
+
+ content = FileProcessor().get_content(
+ file_doc,
+ self.dataset.page_limit or None,
+ pdf_processor,
+ )
+ contents.append(content)
+ finally:
+ self.log.file_parse_time = flt(default_timer() - start, self.precision)
+ _, peak = tracemalloc.get_traced_memory()
+ if not was_tracing:
+ tracemalloc.stop()
+ self.log.file_parse_memory = flt(
+ peak / 1024 / 1024, self.precision
+ ) # bytes → MB
+
+ self.log.file_content = (
+ "\n\n--- Document Separator ---\n\n".join(contents)
+ if len(contents) > 1
+ else contents[0]
+ )
+ return contents
+
+ # ── step 2: AI parsing ──────────────────────────────────
+
+ def _run_ai_parsing(self, file_contents: list[str], file_docs: list[File]) -> dict:
+ if len(file_contents) == 1:
+ return self._run_single_ai_parse(file_contents[0], file_docs[0].name)
+
+ return self._run_multi_ai_parse(file_contents, file_docs)
+
+ def _run_single_ai_parse(self, file_content: str, file_name: str) -> dict:
+ """Parse a single file with AI."""
+ parser = AIParser(self.log.ai_model)
+
+ start = default_timer()
+ ai_content = parser.parse(
+ document_type=self.controller.DOCTYPE,
+ document_schema=self.controller.get_schema(),
+ document_data=file_content,
+ file_doc_name=file_name,
+ company=self.dataset.company,
+ )
+ self.log.ai_parse_time = flt(default_timer() - start, self.precision)
+
+ usage = parser.ai_response.get("usage", {})
+ self.log.prompt_tokens = usage.get("prompt_tokens", 0)
+ self.log.completion_tokens = usage.get("completion_tokens", 0)
+ self.log.total_tokens = usage.get("total_tokens", 0)
+ self.log.ai_response = frappe.as_json(ai_content, indent=2)
+
+ return ai_content
+
+ def _run_multi_ai_parse(
+ self, file_contents: list[str], file_docs: list[File]
+ ) -> dict:
+ """Parse multiple files individually with AI and merge responses.
+
+ Mirrors the controller's _parse_multiple_files flow:
+ parse each file → merge with ResponseMerger → aggregate tokens.
+ """
+ total_prompt = 0
+ total_completion = 0
+ total_tokens_count = 0
+ schema = self.controller.get_schema()
+
+ start = default_timer()
+
+ # parse first file
+ parser = AIParser(self.log.ai_model)
+ response = parser.parse(
+ document_type=self.controller.DOCTYPE,
+ document_schema=schema,
+ document_data=file_contents[0],
+ file_doc_name=file_docs[0].name,
+ company=self.dataset.company,
+ )
+
+ usage = parser.ai_response.get("usage", {})
+ total_prompt += usage.get("prompt_tokens", 0)
+ total_completion += usage.get("completion_tokens", 0)
+ total_tokens_count += usage.get("total_tokens", 0)
+
+ # merge remaining files
+ merger = ResponseMerger(
+ response,
+ schema=schema,
+ match_keys=self.controller.get_match_keys(),
+ )
+
+ for i, file_content in enumerate(file_contents[1:], 1):
+ if merger.is_complete():
+ break
+
+ parser = AIParser(self.log.ai_model)
+ new_response = parser.parse(
+ document_type=self.controller.DOCTYPE,
+ document_schema=schema,
+ document_data=file_content,
+ file_doc_name=file_docs[i].name,
+ company=self.dataset.company,
+ )
+
+ usage = parser.ai_response.get("usage", {})
+ total_prompt += usage.get("prompt_tokens", 0)
+ total_completion += usage.get("completion_tokens", 0)
+ total_tokens_count += usage.get("total_tokens", 0)
+
+ merger.merge(new_response)
+
+ self.log.ai_parse_time = flt(default_timer() - start, self.precision)
+ self.log.prompt_tokens = total_prompt
+ self.log.completion_tokens = total_completion
+ self.log.total_tokens = total_tokens_count
+
+ ai_content = merger.response
+ self.log.ai_response = frappe.as_json(ai_content, indent=2)
+
+ return ai_content
+
+ # ── step 3: cost calculation ────────────────────────────
+
+ def _calculate_cost(self):
+ cost_row = self._get_cost_row()
+ if not cost_row:
+ return
+
+ self.log.currency = cost_row.currency
+ self.log.input_token_cost = cost_row.input_cost_per_million
+ self.log.output_token_cost = cost_row.output_cost_per_million
+
+ prompt = self.log.prompt_tokens or 0
+ completion = self.log.completion_tokens or 0
+
+ self.log.input_cost = flt(
+ prompt * cost_row.input_cost_per_million / 1_000_000, self.precision
+ )
+ self.log.output_cost = flt(
+ completion * cost_row.output_cost_per_million / 1_000_000, self.precision
+ )
+ self.log.total_cost = flt(
+ self.log.input_cost + self.log.output_cost, self.precision
+ )
+
+ # ── step 4: accuracy scoring ─────────────────────────────
+
+ def _score_response(self, ai_content: dict):
+ from transaction_parser.parser_benchmark.scorer import score_response
+
+ expected_fields = self.dataset.expected_fields
+ if not expected_fields:
+ return
+
+ weights = self._get_key_weights()
+
+ result = score_response(
+ expected_fields=[
+ {"key": row.key, "expected_json": row.expected_json}
+ for row in expected_fields
+ ],
+ actual=ai_content,
+ weights=weights,
+ precision=self.significant_digits,
+ )
+
+ self.log.accuracy_score = result["overall_accuracy"]
+
+ for detail in result["details"]:
+ self.log.append(
+ "score_details",
+ {
+ "key": detail["key"],
+ "matched": detail["matched"],
+ "total": detail["total"],
+ "accuracy": detail["accuracy"],
+ "mismatches": frappe.as_json(detail["mismatches"], indent=2)
+ if detail["mismatches"]
+ else None,
+ },
+ )
+
+ def _get_key_weights(self) -> dict[str, float]:
+ """Load key weights from Parser Benchmark Settings."""
+ return {row.key: row.weight for row in (self.settings.key_weights or [])}
diff --git a/transaction_parser/parser_benchmark/scorer.py b/transaction_parser/parser_benchmark/scorer.py
new file mode 100644
index 0000000..3ee081c
--- /dev/null
+++ b/transaction_parser/parser_benchmark/scorer.py
@@ -0,0 +1,179 @@
+import frappe
+from frappe.utils import flt
+
+
+def _normalize(obj):
+ """Recursively normalize values for comparison.
+
+ - Empty strings → None
+ - Strings → stripped and lowercased
+ - frappe._dict → plain dict
+ """
+ if isinstance(obj, dict):
+ return {k: _normalize(v) for k, v in obj.items()}
+
+ if isinstance(obj, list):
+ return [_normalize(v) for v in obj]
+
+ if obj == "":
+ return None
+
+ if isinstance(obj, str):
+ return obj.strip().lower()
+
+ return obj
+
+
+def _compare_scalar(
+ expected, actual, path: str, precision: int
+) -> tuple[int, int, list]:
+ """Compare two scalar (non-dict, non-list) values.
+
+ Returns (matched, total, mismatches).
+ """
+ if expected is None and actual is None:
+ return 1, 1, []
+
+ if expected is None or actual is None:
+ return 0, 1, [{"field": path, "expected": expected, "actual": actual}]
+
+ # numeric comparison with tolerance
+ if isinstance(expected, int | float) and isinstance(actual, int | float):
+ if flt(expected, precision) == flt(actual, precision):
+ return 1, 1, []
+ return 0, 1, [{"field": path, "expected": expected, "actual": actual}]
+
+ # string comparison (already lowered by _normalize)
+ if str(expected) == str(actual):
+ return 1, 1, []
+
+ return 0, 1, [{"field": path, "expected": expected, "actual": actual}]
+
+
+def _compare(expected, actual, path: str, precision: int) -> tuple[int, int, list]:
+ """Recursively compare expected vs actual, counting leaf field matches.
+
+ Only keys/indices present in `expected` are scored — extra keys in
+ `actual` are ignored. Lists are compared index-by-index (order matters).
+
+ Returns (matched, total, mismatches).
+ """
+ if isinstance(expected, dict):
+ matched = total = 0
+ mismatches = []
+
+ for key, exp_val in expected.items():
+ child_path = f"{path}.{key}" if path else key
+ act_val = actual.get(key) if isinstance(actual, dict) else None
+ m, t, mm = _compare(exp_val, act_val, child_path, precision)
+ matched += m
+ total += t
+ mismatches.extend(mm)
+
+ return matched, total, mismatches
+
+ if isinstance(expected, list):
+ matched = total = 0
+ mismatches = []
+ actual_list = actual if isinstance(actual, list) else []
+
+ for idx, exp_item in enumerate(expected):
+ child_path = f"{path}[{idx}]"
+ act_item = actual_list[idx] if idx < len(actual_list) else None
+
+ if act_item is None:
+ # missing actual item — count all leaves in expected as mismatched
+ m, t, mm = _compare(exp_item, None, child_path, precision)
+ mismatches.extend(mm)
+ else:
+ m, t, mm = _compare(exp_item, act_item, child_path, precision)
+ mismatches.extend(mm)
+
+ matched += m
+ total += t
+
+ return matched, total, mismatches
+
+ # scalar
+ return _compare_scalar(expected, actual, path, precision)
+
+
+def score_key(expected, actual, key: str, precision: int = 2) -> dict:
+ """Score a single top-level key.
+
+ Args:
+ expected: The expected value (parsed JSON) for this key.
+ actual: The actual AI response value for this key.
+ key: The key name (used as path prefix in mismatch reports).
+ precision: Decimal precision for numeric comparisons.
+
+ Returns:
+ {"key": str, "matched": int, "total": int, "accuracy": float, "mismatches": list}
+ """
+ exp_normalized = _normalize(expected)
+ act_normalized = _normalize(actual)
+
+ matched, total, mismatches = _compare(
+ exp_normalized, act_normalized, key, precision
+ )
+
+ accuracy = flt((matched / total) * 100, 2) if total else 100.0
+
+ return {
+ "key": key,
+ "matched": matched,
+ "total": total,
+ "accuracy": accuracy,
+ "mismatches": mismatches,
+ }
+
+
+def score_response(
+ expected_fields: list[dict],
+ actual: dict,
+ *,
+ weights: dict[str, float] | None = None,
+ precision: int = 2,
+) -> dict:
+ """Score AI response against expected fields with per-key breakdown.
+
+ Args:
+ expected_fields: List of {"key": str, "expected_json": str|dict} rows
+ from the Dataset child table.
+ actual: The full AI response dict.
+ weights: {key_name: weight} from Settings. Defaults to 1 for all keys.
+ precision: Decimal precision for numeric comparisons.
+
+ Returns:
+ {"overall_accuracy": float, "details": list[dict]}
+ where each detail is the output of score_key().
+ """
+ weights = weights or {}
+ details = []
+
+ weighted_matched = 0.0
+ weighted_total = 0.0
+
+ for row in expected_fields:
+ key = row["key"]
+ expected = row["expected_json"]
+
+ if isinstance(expected, str):
+ expected = frappe.parse_json(expected)
+
+ actual_value = actual.get(key) if isinstance(actual, dict) else None
+ result = score_key(expected, actual_value, key, precision)
+ details.append(result)
+
+ w = weights.get(key, 1.0)
+ weighted_matched += result["matched"] * w
+ weighted_total += result["total"] * w
+
+ overall_accuracy = (
+ flt((weighted_matched / weighted_total) * 100, 2) if weighted_total else 0.0
+ )
+
+ return {
+ "overall_accuracy": overall_accuracy,
+ "details": details,
+ }
diff --git a/transaction_parser/patches.txt b/transaction_parser/patches.txt
index 27002f4..cad4a73 100644
--- a/transaction_parser/patches.txt
+++ b/transaction_parser/patches.txt
@@ -1,8 +1,13 @@
[pre_model_sync]
# Patches added in this section will be executed before doctypes are migrated
# Read docs to understand patches: https://frappeframework.com/docs/v14/user/en/database-migrations
+transaction_parser.patches.rename_gemini_models
+transaction_parser.patches.remove_dataset_file_field
[post_model_sync]
# Patches added in this section will be executed after doctypes are migrated
execute:from transaction_parser.install import after_install; after_install() #2
transaction_parser.patches.set_default_pdf_processor #1
+transaction_parser.patches.recalculate_accuracy
+transaction_parser.patches.populate_dataset_files_table
+transaction_parser.patches.enable_one_document_per_communication
diff --git a/transaction_parser/patches/enable_one_document_per_communication.py b/transaction_parser/patches/enable_one_document_per_communication.py
new file mode 100644
index 0000000..103b8b8
--- /dev/null
+++ b/transaction_parser/patches/enable_one_document_per_communication.py
@@ -0,0 +1,7 @@
+import frappe
+
+
+def execute():
+ frappe.db.set_single_value(
+ "Transaction Parser Settings", "process_one_document_per_communication", 1
+ )
diff --git a/transaction_parser/patches/populate_dataset_files_table.py b/transaction_parser/patches/populate_dataset_files_table.py
new file mode 100644
index 0000000..07a415f
--- /dev/null
+++ b/transaction_parser/patches/populate_dataset_files_table.py
@@ -0,0 +1,52 @@
+"""
+Populate the new ``files`` child table on Parser Benchmark Dataset.
+
+After model_sync creates the ``Parser Benchmark Dataset File`` child table,
+this patch reads the File documents that were previously attached (by the
+``remove_dataset_file_field`` pre_model_sync patch) and inserts them as child
+rows so the new child-table based workflow works seamlessly.
+"""
+
+import frappe
+
+
+def execute():
+ datasets = frappe.get_all("Parser Benchmark Dataset", fields=["name"])
+
+ for ds in datasets:
+ # Skip if already has files in child table
+ if frappe.db.count("Parser Benchmark Dataset File", {"parent": ds.name}):
+ continue
+
+ # Find File docs attached to this dataset
+ files = frappe.get_all(
+ "File",
+ filters={
+ "attached_to_doctype": "Parser Benchmark Dataset",
+ "attached_to_name": ds.name,
+ },
+ fields=["file_url", "file_type"],
+ )
+
+ if not files:
+ continue
+
+ for idx, f in enumerate(files, 1):
+ child = frappe.new_doc("Parser Benchmark Dataset File")
+ child.update(
+ {
+ "parent": ds.name,
+ "parenttype": "Parser Benchmark Dataset",
+ "parentfield": "files",
+ "idx": idx,
+ "file": f.file_url,
+ "file_type": f.file_type or "",
+ }
+ )
+ child.db_insert()
+
+ # Update is_multiple_files flag
+ is_multiple = 1 if len(files) > 1 else 0
+ frappe.db.set_value(
+ "Parser Benchmark Dataset", ds.name, "is_multiple_files", is_multiple
+ )
diff --git a/transaction_parser/patches/recalculate_accuracy.py b/transaction_parser/patches/recalculate_accuracy.py
new file mode 100644
index 0000000..f2985fb
--- /dev/null
+++ b/transaction_parser/patches/recalculate_accuracy.py
@@ -0,0 +1,96 @@
+import frappe
+from frappe.utils import cint
+
+
+def execute():
+ """Enqueue recalculation of accuracy scores for all completed Parser Benchmark Logs."""
+ log_names = frappe.get_all(
+ "Parser Benchmark Log",
+ filters={"status": "Completed", "ai_response": ["is", "set"]},
+ pluck="name",
+ )
+
+ if not log_names:
+ return
+
+ frappe.enqueue(
+ recalculate_accuracy,
+ log_names=log_names,
+ queue="long",
+ timeout=3600,
+ )
+
+
+def recalculate_accuracy(log_names: list[str]):
+ """Recalculate accuracy scores for the given Parser Benchmark Logs.
+
+ Uses cached docs for datasets to avoid repeated DB reads.
+ Commits in batches of 100 to avoid long-running transactions.
+ """
+ from transaction_parser.parser_benchmark.scorer import score_response
+
+ settings = frappe.get_cached_doc("Parser Benchmark Settings")
+ weights = {row.key: row.weight for row in (settings.key_weights or [])}
+ precision = cint(frappe.db.get_default("float_precision")) or 2
+
+ BATCH_SIZE = 100
+
+ for idx, log_name in enumerate(log_names, start=1):
+ try:
+ _recalculate_log(log_name, weights, precision, score_response)
+ except Exception:
+ frappe.log_error(
+ title=f"Recalculate Accuracy: {log_name}",
+ message=frappe.get_traceback(),
+ )
+
+ if idx % BATCH_SIZE == 0:
+ frappe.db.commit() # nosemgrep
+
+ frappe.db.commit() # nosemgrep
+
+
+def _recalculate_log(log_name, weights, precision, score_response):
+ """Rescore a single log and update its score details."""
+ log = frappe.get_doc("Parser Benchmark Log", log_name)
+
+ if not log.ai_response:
+ return
+
+ dataset = frappe.get_cached_doc("Parser Benchmark Dataset", log.dataset)
+
+ if not dataset.expected_fields:
+ return
+
+ ai_content = frappe.parse_json(log.ai_response)
+
+ result = score_response(
+ expected_fields=[
+ {"key": row.key, "expected_json": row.expected_json}
+ for row in dataset.expected_fields
+ ],
+ actual=ai_content,
+ weights=weights,
+ precision=precision,
+ )
+
+ # clear old score details
+ log.score_details = []
+
+ for detail in result["details"]:
+ log.append(
+ "score_details",
+ {
+ "key": detail["key"],
+ "matched": detail["matched"],
+ "total": detail["total"],
+ "accuracy": detail["accuracy"],
+ "mismatches": frappe.as_json(detail["mismatches"], indent=2)
+ if detail["mismatches"]
+ else None,
+ },
+ )
+
+ log.accuracy_score = result["overall_accuracy"]
+ log.flags.ignore_validate = True
+ log.save(ignore_permissions=True)
diff --git a/transaction_parser/patches/remove_dataset_file_field.py b/transaction_parser/patches/remove_dataset_file_field.py
new file mode 100644
index 0000000..87e2b48
--- /dev/null
+++ b/transaction_parser/patches/remove_dataset_file_field.py
@@ -0,0 +1,73 @@
+"""
+Migrate `file` field data on Parser Benchmark Dataset to Frappe File attachments.
+
+Before the `file` column is dropped (pre_model_sync), ensure every Dataset that
+had a file URL stored in the `file` field has a corresponding File doc properly
+linked via `attached_to_doctype` / `attached_to_name`.
+"""
+
+import frappe
+
+
+def execute():
+ if not frappe.db.has_column("Parser Benchmark Dataset", "file"):
+ return
+
+ datasets = frappe.get_all(
+ "Parser Benchmark Dataset",
+ filters={"file": ("is", "set")},
+ fields=["name", "file"],
+ )
+
+ for ds in datasets:
+ file_url = ds.file
+ if not file_url:
+ continue
+
+ # Check if a properly-linked File doc already exists
+ existing = frappe.db.exists(
+ "File",
+ {
+ "file_url": file_url,
+ "attached_to_doctype": "Parser Benchmark Dataset",
+ "attached_to_name": ds.name,
+ },
+ )
+
+ if existing:
+ continue
+
+ # Try to find an unlinked File doc with the same URL and link it
+ unlinked = frappe.db.get_value(
+ "File",
+ {"file_url": file_url},
+ ["name", "attached_to_doctype", "attached_to_name"],
+ as_dict=True,
+ )
+
+ if unlinked:
+ if not unlinked.attached_to_doctype:
+ # Link the orphan File doc to this dataset
+ frappe.db.set_value(
+ "File",
+ unlinked.name,
+ {
+ "attached_to_doctype": "Parser Benchmark Dataset",
+ "attached_to_name": ds.name,
+ },
+ )
+ else:
+ # File is attached to something else — create a copy
+ _create_attachment(ds.name, file_url)
+ else:
+ # No File doc exists at all — create one
+ _create_attachment(ds.name, file_url)
+
+
+def _create_attachment(dataset_name: str, file_url: str):
+ """Create a new File doc attached to the given dataset."""
+ f = frappe.new_doc("File")
+ f.file_url = file_url
+ f.attached_to_doctype = "Parser Benchmark Dataset"
+ f.attached_to_name = dataset_name
+ f.insert(ignore_permissions=True)
diff --git a/transaction_parser/patches/rename_gemini_models.py b/transaction_parser/patches/rename_gemini_models.py
new file mode 100644
index 0000000..a168455
--- /dev/null
+++ b/transaction_parser/patches/rename_gemini_models.py
@@ -0,0 +1,16 @@
+import frappe
+
+RENAME_MAP = {
+ "Google Gemini Pro": "Google Gemini Pro-2.5",
+ "Google Gemini Flash": "Google Gemini Flash-2.5",
+}
+
+DOCTYPE = "Transaction Parser Settings"
+FIELD = "default_ai_model"
+
+
+def execute():
+ current = frappe.db.get_single_value(DOCTYPE, FIELD)
+
+ if current in RENAME_MAP:
+ frappe.db.set_single_value(DOCTYPE, FIELD, RENAME_MAP[current])
diff --git a/transaction_parser/public/js/transaction_parser_dialog.js b/transaction_parser/public/js/transaction_parser_dialog.js
index 1382721..919052f 100644
--- a/transaction_parser/public/js/transaction_parser_dialog.js
+++ b/transaction_parser/public/js/transaction_parser_dialog.js
@@ -52,6 +52,17 @@ async function create_transaction_parser_dialog(transaction_type, list_view) {
default: get_default_country(),
reqd: 1,
},
+ {
+ fieldtype: "Section Break",
+ },
+ {
+ fieldname: "company",
+ label: __("Company"),
+ fieldtype: "Link",
+ options: "Company",
+ default: frappe.defaults.get_user_default("Company"),
+ reqd: 1,
+ },
],
primary_action_label: __("Submit"),
primary_action(values) {
diff --git a/transaction_parser/transaction_parser/__init__.py b/transaction_parser/transaction_parser/__init__.py
index abd27da..c3dcc68 100644
--- a/transaction_parser/transaction_parser/__init__.py
+++ b/transaction_parser/transaction_parser/__init__.py
@@ -15,7 +15,7 @@
@frappe.whitelist()
-def parse(transaction, country, file_url, ai_model=None, page_limit=None):
+def parse(transaction, country, file_url, ai_model=None, page_limit=None, company=None):
is_enabled()
frappe.has_permission(TRANSACTION_MAP[transaction], "create", throw=True)
@@ -24,9 +24,10 @@ def parse(transaction, country, file_url, ai_model=None, page_limit=None):
_parse,
country=cstr(country),
transaction=cstr(transaction),
- file_url=cstr(file_url),
+ file_urls=cstr(file_url),
ai_model=cstr(ai_model),
page_limit=cint(page_limit),
+ company=cstr(company) if company else None,
queue="long",
now=frappe.conf.developer_mode,
)
@@ -35,54 +36,106 @@ def parse(transaction, country, file_url, ai_model=None, page_limit=None):
def _parse(
country,
transaction,
- file_url,
+ file_urls,
ai_model=None,
page_limit=None,
user=None,
party=None,
company=None,
+ communication_name=None,
):
+ file = None
+
try:
- file = None
- filename = file_url.split("/")[-1]
+ if (
+ isinstance(file_urls, str)
+ and file_urls.startswith("[")
+ and file_urls.endswith("]")
+ ):
+ file_urls = frappe.parse_json(file_urls)
+
+ elif isinstance(file_urls, str):
+ file_urls = [file_urls]
- file = frappe.get_last_doc("File", filters={"file_url": file_url})
- filename = file.file_name
+ file_names = frappe.get_list(
+ "File",
+ filters={"file_url": ("in", file_urls)},
+ fields=["name", "file_type"],
+ order_by="creation desc",
+ group_by="file_url",
+ )
+
+ # xlsx/xls first, then pdf, then csv. If no xlsx/xls, csv takes its place.
+ file_types = {(f.file_type or "").lower() for f in file_names}
+ has_spreadsheet = file_types & {"xlsx", "xls"}
+
+ if has_spreadsheet:
+ FILE_TYPE_PRIORITY = {"xlsx": 0, "xls": 0, "pdf": 1, "csv": 2}
+ else:
+ FILE_TYPE_PRIORITY = {"csv": 0, "pdf": 1}
+
+ file_names.sort(
+ key=lambda f: FILE_TYPE_PRIORITY.get((f.file_type or "").lower(), 99)
+ )
+
+ files = []
+ for file_name in file_names:
+ file = frappe.get_doc("File", file_name)
+ files.append(file)
controller = get_controller(country, transaction)(party=party, company=company)
- doc = controller.generate(file, ai_model, page_limit)
+ doc = controller.generate(files, ai_model, page_limit)
+ filenames = (
+ ", ".join([f.file_name for f in files])
+ if len(files) > 1
+ else files[0].file_name
+ )
notification = {
"document_type": TRANSACTION_MAP[transaction],
"document_name": doc.name,
"subject": _("{0} {1} generated from {2}").format(
_(TRANSACTION_MAP[transaction]),
doc.name,
- filename,
+ filenames,
),
}
except Exception as e:
notification = None
+ reference_doctype = "Communication" if communication_name else "File"
+ reference_docname = (
+ communication_name
+ if communication_name
+ else (files[0].name if files else None)
+ )
if (
isinstance(e, frappe.DuplicateEntryError)
and frappe.flags.skip_duplicate_error
):
+ subject = _("Duplicate {0} found for {1}").format(
+ _(TRANSACTION_MAP[transaction]),
+ f"{reference_doctype} {reference_docname}",
+ )
+
notification = {
- "document_type": "File",
- "document_name": file.name if file else filename,
- "subject": _("Duplicate entry found for {0}").format(filename),
+ "document_type": reference_doctype,
+ "document_name": reference_docname,
+ "subject": subject,
"message": str(e),
}
- return
- error_log = frappe.log_error(
- "Transaction Parser API Error",
- reference_doctype="File",
- reference_name=file.name if file else filename,
+ if not (error_log := getattr(e, "error_log", None)):
+ error_log = frappe.log_error(
+ "Transaction Parser Error",
+ reference_doctype=reference_doctype,
+ reference_name=reference_docname,
+ )
+
+ message = _("Failed to generate {0} from {1}").format(
+ TRANSACTION_MAP[transaction], f"{reference_doctype} {reference_docname}"
)
- message = _("Failed to generate {0} from {1}").format(_(transaction), filename)
notification = {
"document_type": error_log.doctype,
@@ -91,7 +144,7 @@ def _parse(
"message": str(e),
}
- email_failure(user, message, str(e), file_url)
+ email_failure(user, message, str(e), file_urls)
finally:
if notification:
diff --git a/transaction_parser/transaction_parser/ai_integration/models.py b/transaction_parser/transaction_parser/ai_integration/models.py
index 9c6e3be..65dfd58 100644
--- a/transaction_parser/transaction_parser/ai_integration/models.py
+++ b/transaction_parser/transaction_parser/ai_integration/models.py
@@ -120,6 +120,6 @@ class GeminiFlash(Model):
"OpenAI gpt-4o-mini": OpenAIGPT4oMini(),
"OpenAI gpt-5": OpenAIGPT5(),
"OpenAI gpt-5-mini": OpenAIGPT5Mini(),
- "Google Gemini Pro": GeminiPro(),
- "Google Gemini Flash": GeminiFlash(),
+ "Google Gemini Pro-2.5": GeminiPro(),
+ "Google Gemini Flash-2.5": GeminiFlash(),
}
diff --git a/transaction_parser/transaction_parser/ai_integration/parser.py b/transaction_parser/transaction_parser/ai_integration/parser.py
index 6db6d3f..f07a7b9 100644
--- a/transaction_parser/transaction_parser/ai_integration/parser.py
+++ b/transaction_parser/transaction_parser/ai_integration/parser.py
@@ -23,6 +23,7 @@ def __init__(self, model: str | None = None, settings=None):
is_enabled(self.settings)
self.model = self._get_model(model)
+ self.ai_response = {}
if not self.model:
frappe.throw(_(f"AI Model: {model} not found"))
@@ -35,17 +36,30 @@ def parse(
document_schema: dict,
document_data: str,
file_doc_name: str | None = None,
+ company: str | None = None,
) -> dict:
- messages = self._build_messages(document_type, document_schema, document_data)
- response = self.send_message(messages=messages, file_doc_name=file_doc_name)
+ messages = self._build_messages(
+ document_type, document_schema, document_data, company
+ )
+
+ response = self.send_message(
+ messages=messages,
+ file_doc_name=file_doc_name,
+ )
+
return self.get_content(response)
def _build_messages(
- self, document_type: str, document_schema: dict, document_data: str
+ self,
+ document_type: str,
+ document_schema: dict,
+ document_data: str,
+ company: str | None = None,
) -> tuple:
"""Build the message structure for AI API call."""
+ company_info = self._get_company_info(company) if company else ""
system_prompt = get_system_prompt(document_schema)
- user_prompt = get_user_prompt(document_type, document_data)
+ user_prompt = get_user_prompt(document_type, document_data, company_info)
return (
{
@@ -58,6 +72,23 @@ def _build_messages(
},
)
+ @staticmethod
+ def _get_company_info(company: str) -> str:
+ """Build a company context string with name and address if available."""
+ from frappe.contacts.doctype.address.address import get_company_address
+ from frappe.utils import strip_html
+
+ info = f"Company: {company}"
+
+ address = get_company_address(company)
+ if address and address.company_address_display:
+ address_text = strip_html(address.company_address_display).strip()
+
+ if address_text:
+ info += f"\nLocated at: {address_text}"
+
+ return info
+
def send_message(self, messages: tuple, file_doc_name: str | None = None) -> dict:
"""Send messages to AI API and handle the response."""
log = self._create_log_entry(file_doc_name)
@@ -84,13 +115,14 @@ def send_message(self, messages: tuple, file_doc_name: str | None = None) -> dic
def _create_log_entry(self, file_doc_name: str | None) -> frappe._dict:
"""Create a log entry for the API call."""
log = frappe._dict(url=self.model.base_url)
- if file_doc_name:
- log.update(
- {
- "reference_doctype": "File",
- "reference_name": file_doc_name,
- }
- )
+
+ log.update(
+ {
+ "reference_doctype": "File",
+ "reference_name": file_doc_name,
+ }
+ )
+
return log
def _make_api_call(self, messages: tuple) -> Any:
diff --git a/transaction_parser/transaction_parser/ai_integration/prompts.py b/transaction_parser/transaction_parser/ai_integration/prompts.py
index 95cf2e0..9d63d50 100644
--- a/transaction_parser/transaction_parser/ai_integration/prompts.py
+++ b/transaction_parser/transaction_parser/ai_integration/prompts.py
@@ -3,6 +3,8 @@
# Mapping of output document types to their corresponding input document types
INPUT_DOCUMENTS = {"Sales Order": "Purchase Order", "Purchase Invoice": "Sales Invoice"}
+SELLER_DOCUMENT_TYPES = {"Sales Order"}
+
def get_system_prompt(document_schema: dict) -> str:
return f"""You are a JSON data extraction and validation expert for your company's ERP platform.
@@ -34,10 +36,27 @@ def get_system_prompt(document_schema: dict) -> str:
{document_schema}"""
-def get_user_prompt(document_type: str, document_data: str) -> str:
+def get_user_prompt(
+ document_type: str, document_data: str, company_info: str = ""
+) -> str:
input_doc_type = INPUT_DOCUMENTS.get(document_type, "document")
- return f"""Generate {document_type} for given {input_doc_type} according to above JSON schema.
+ company_context = ""
+ if company_info:
+ if document_type in SELLER_DOCUMENT_TYPES:
+ role_hint = "Use this to correctly identify the company as the seller/vendor and the other party as the customer/buyer."
+ else:
+ role_hint = "Use this to correctly identify the company as the buyer/recipient and the other party as the vendor/supplier."
+
+ company_context = f"""
+
+This {input_doc_type} is received by the following company:
+{company_info}
+
+{role_hint}
+"""
+
+ return f"""Generate {document_type} for the given {input_doc_type} according to above JSON schema.{company_context}
Document data is given below:
{document_data}"""
diff --git a/transaction_parser/transaction_parser/controllers/transaction.py b/transaction_parser/transaction_parser/controllers/transaction.py
index 05a59c0..3ee16b4 100644
--- a/transaction_parser/transaction_parser/controllers/transaction.py
+++ b/transaction_parser/transaction_parser/controllers/transaction.py
@@ -2,11 +2,16 @@
import frappe
from erpnext.setup.utils import get_exchange_rate
from erpnext.stock.get_item_details import get_item_details
+from httpx import HTTPError
from rapidfuzz import fuzz, process
+from transaction_parser.exceptions import FileProcessingError
from transaction_parser.transaction_parser.ai_integration.parser import AIParser
from transaction_parser.transaction_parser.utils import to_dict
from transaction_parser.transaction_parser.utils.file_processor import FileProcessor
+from transaction_parser.transaction_parser.utils.response_merger import (
+ ResponseMerger,
+)
class Transaction:
@@ -29,13 +34,24 @@ def __init__(
self.company = company
def generate(
- self, file, ai_model: str | None = None, page_limit: int | None = None
+ self,
+ files,
+ ai_model: str | None = None,
+ page_limit: int | None = None,
):
self.initialize()
- self.file = file
+ if isinstance(files, str):
+ files = [files]
+
+ self.files = files
self.ai_model = ai_model
self.data = self._parse_file_content(ai_model, page_limit)
+
+ return self.create_document()
+
+ def create_document(self):
+ """Create, populate, and insert the transaction document."""
self.doc = frappe.get_doc({"doctype": self.DOCTYPE})
self.doc.is_created_by_transaction_parser = 1
@@ -49,7 +65,7 @@ def generate(
def initialize(self) -> None:
# file processing
- self.file = None
+ self.files = None
# output schema
self.schema = None
@@ -72,20 +88,76 @@ def initialize(self) -> None:
def _parse_file_content(
self, ai_model: str | None = None, page_limit: int | None = None
) -> dict:
- content = FileProcessor().get_content(self.file, page_limit)
- schema = self.get_schema()
-
- return AIParser(ai_model, self.settings).parse(
- document_type=self.DOCTYPE,
- document_schema=schema,
- document_data=content,
- file_doc_name=self.file.name,
+ if len(self.files) > 1:
+ return self._parse_multiple_files(ai_model, page_limit)
+
+ return self._parse_single_file(self.files[0], ai_model, page_limit)
+
+ def _parse_single_file(
+ self,
+ file,
+ ai_model: str | None = None,
+ page_limit: int | None = None,
+ ) -> dict:
+ try:
+ content = FileProcessor().get_content(file, page_limit)
+ schema = self.get_schema()
+
+ return AIParser(ai_model, self.settings).parse(
+ document_type=self.DOCTYPE,
+ document_schema=schema,
+ document_data=content,
+ file_doc_name=file.name,
+ company=self.company,
+ )
+
+ except FileProcessingError as e:
+ error_log = frappe.log_error(
+ title="File processing error in Transaction Parser",
+ reference_doctype="File",
+ reference_name=file.name,
+ )
+ e.error_log = error_log
+ raise e
+
+ except HTTPError as e:
+ error_log = frappe.log_error(
+ title="Transaction Parser API error",
+ reference_doctype="File",
+ reference_name=file.name,
+ )
+ e.error_log = error_log
+ raise e
+
+ def _parse_multiple_files(
+ self, ai_model: str | None = None, page_limit: int | None = None
+ ) -> dict:
+ response = self._parse_single_file(self.files[0], ai_model, page_limit)
+ merger = ResponseMerger(
+ response,
+ schema=self.get_schema(),
+ match_keys=self.get_match_keys(),
)
+ for file in self.files[1:]:
+ if merger.is_complete():
+ break
+
+ new_response = self._parse_single_file(file, ai_model, page_limit)
+ merger.merge(new_response)
+
+ return merger.response
+
###################################
########## Output Schema ##########
###################################
+ def get_match_keys(self) -> dict[str, list[str]]:
+ """Return list field name -> key fields used to match items during merge."""
+ return {
+ "item_list": ["party_item_code", "quantity", "rate", "description"],
+ }
+
def get_schema(self) -> dict:
if not self.schema:
self.schema = self._get_schema()
@@ -271,9 +343,12 @@ def _set_flags(self) -> None:
self.doc.flags.ignore_links = True
def _attach_file(self) -> None:
- self.file.attached_to_doctype = self.DOCTYPE
- self.file.attached_to_name = self.doc.name
- self.file.save()
+ files_to_attach = self.files if isinstance(self.files, list) else [self.files]
+
+ for file_doc in files_to_attach:
+ file_doc.attached_to_doctype = self.DOCTYPE
+ file_doc.attached_to_name = self.doc.name
+ file_doc.save()
def set_exchange_rate(self, from_currency, date, args):
company_currency = erpnext.get_company_currency(self.doc.company)
diff --git a/transaction_parser/transaction_parser/doctype/transaction_parser_party_email/transaction_parser_party_email.json b/transaction_parser/transaction_parser/doctype/transaction_parser_party_email/transaction_parser_party_email.json
index c8aab80..72e3372 100644
--- a/transaction_parser/transaction_parser/doctype/transaction_parser_party_email/transaction_parser_party_email.json
+++ b/transaction_parser/transaction_parser/doctype/transaction_parser_party_email/transaction_parser_party_email.json
@@ -50,4 +50,4 @@
"sort_field": "modified",
"sort_order": "DESC",
"states": []
-}
\ No newline at end of file
+}
diff --git a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.json b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.json
index 53dc83c..d32ab8b 100644
--- a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.json
+++ b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.json
@@ -16,6 +16,7 @@
"email_configuration_section",
"parse_incoming_emails",
"parse_party_emails",
+ "process_one_document_per_communication",
"incoming_email_accounts",
"party_emails",
"custom_fields_tab",
@@ -91,7 +92,7 @@
"fieldtype": "Select",
"label": "Default AI Model",
"mandatory_depends_on": "eval: doc.enabled",
- "options": "DeepSeek Chat\nDeepSeek Reasoner\nOpenAI gpt-4o\nOpenAI gpt-4o-mini\nOpenAI gpt-5\nOpenAI gpt-5-mini\nGoogle Gemini Pro\nGoogle Gemini Flash"
+ "options": "DeepSeek Chat\nDeepSeek Reasoner\nOpenAI gpt-4o\nOpenAI gpt-4o-mini\nOpenAI gpt-5\nOpenAI gpt-5-mini\nGoogle Gemini Pro-2.5\nGoogle Gemini Flash-2.5"
},
{
"default": "OCRMyPDF",
@@ -161,12 +162,20 @@
"fieldname": "transaction_configurations_section",
"fieldtype": "Section Break",
"label": "Transaction Configurations"
+ },
+ {
+ "default": "0",
+ "depends_on": "eval: doc.parse_incoming_emails",
+ "description": "All attachments from a communication are combined to create a single document",
+ "fieldname": "process_one_document_per_communication",
+ "fieldtype": "Check",
+ "label": "Process One Document Per Communication"
}
],
"index_web_pages_for_search": 1,
"issingle": 1,
"links": [],
- "modified": "2026-03-14 13:35:17.150533",
+ "modified": "2026-04-01 15:29:35.262995",
"modified_by": "Administrator",
"module": "Transaction Parser",
"name": "Transaction Parser Settings",
@@ -187,4 +196,4 @@
"sort_field": "modified",
"sort_order": "DESC",
"states": []
-}
+}
\ No newline at end of file
diff --git a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
index 95b4c43..8ff3911 100644
--- a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
+++ b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
@@ -12,6 +12,50 @@
class TransactionParserSettings(Document):
+ # begin: auto-generated types
+ # This code is auto-generated. Do not modify anything in this block.
+
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING:
+ from frappe.types import DF
+
+ from transaction_parser.transaction_parser.doctype.transaction_parser_api_key_item.transaction_parser_api_key_item import (
+ TransactionParserAPIKeyItem,
+ )
+ from transaction_parser.transaction_parser.doctype.transaction_parser_email_account.transaction_parser_email_account import (
+ TransactionParserEmailAccount,
+ )
+ from transaction_parser.transaction_parser.doctype.transaction_parser_party_email.transaction_parser_party_email import (
+ TransactionParserPartyEmail,
+ )
+
+ address_schema: DF.JSON | None
+ api_keys: DF.Table[TransactionParserAPIKeyItem]
+ base_schema: DF.JSON | None
+ default_ai_model: DF.Literal[
+ "DeepSeek Chat",
+ "DeepSeek Reasoner",
+ "OpenAI gpt-4o",
+ "OpenAI gpt-4o-mini",
+ "OpenAI gpt-5",
+ "OpenAI gpt-5-mini",
+ "Google Gemini Pro-2.5",
+ "Google Gemini Flash-2.5",
+ ]
+ enabled: DF.Check
+ incoming_email_accounts: DF.Table[TransactionParserEmailAccount]
+ invoice_lookback_count: DF.Int
+ item_schema: DF.JSON | None
+ parse_incoming_emails: DF.Check
+ parse_party_emails: DF.Check
+ party_emails: DF.Table[TransactionParserPartyEmail]
+ party_schema: DF.JSON | None
+ pdf_processor: DF.Literal["OCRMyPDF", "Docling"]
+ process_one_document_per_communication: DF.Check
+ tax_schema: DF.JSON | None
+
+ # end: auto-generated types
# TODO: can we check API creds?
def validate(self):
self.validate_lookback_count()
diff --git a/transaction_parser/transaction_parser/overrides/communication.py b/transaction_parser/transaction_parser/overrides/communication.py
index 679ae95..47be988 100644
--- a/transaction_parser/transaction_parser/overrides/communication.py
+++ b/transaction_parser/transaction_parser/overrides/communication.py
@@ -13,6 +13,18 @@ def on_update(doc, method=None):
if not (settings.enabled and settings.parse_incoming_emails):
return
+ matched_account = next(
+ (
+ row
+ for row in settings.incoming_email_accounts
+ if row.to_email in doc.recipients
+ ),
+ None,
+ )
+
+ if not matched_account:
+ return
+
if settings.parse_party_emails:
matched_party_config = next(
(row for row in settings.party_emails if row.party_email == doc.sender),
@@ -41,21 +53,10 @@ def on_update(doc, method=None):
settings,
default_user,
matched_party_config.party,
+ matched_account.company,
)
return
- matched_account = next(
- (
- row
- for row in settings.incoming_email_accounts
- if row.to_email in doc.recipients
- ),
- None,
- )
-
- if not matched_account:
- return
-
# Attachments are not available when the Communication doc is created.
# Next time the doc is updated, we will check for attachments,
# and update the flag `is_processed_by_transaction_parser` accordingly.
@@ -98,19 +99,22 @@ def _process_attachments(
else:
country = frappe.db.get_value("Company", company, "country")
- sorted_attachments = sorted(
- attachments,
- key=lambda attachment: {"xlsx": 0, "csv": 1, "pdf": 2}.get(
- attachment.file_url.split(".")[-1].lower(), 3
- ),
- )
+ supported_extensions = {"pdf", "xlsx", "xls", "csv"}
+ filtered_attachments = [
+ attachment
+ for attachment in attachments
+ if attachment.file_url.split(".")[-1].lower() in supported_extensions
+ ]
+
+ if not filtered_attachments:
+ return
frappe.enqueue(
"transaction_parser.transaction_parser.overrides.communication._parse_attachments",
doc=doc,
country=country,
transaction_type=transaction_type,
- attachments=sorted_attachments,
+ attachments=filtered_attachments,
ai_model=settings.default_ai_model,
user=user,
party=party,
@@ -122,16 +126,32 @@ def _process_attachments(
def _parse_attachments(
doc, country, transaction_type, attachments, ai_model, user, party, company
):
- for attachment in attachments:
+ settings = frappe.get_cached_doc("Transaction Parser Settings")
+
+ if settings.process_one_document_per_communication:
+ file_urls = [attachment.file_url for attachment in attachments]
_parse(
country=country,
transaction=transaction_type,
- file_url=attachment.file_url,
+ file_urls=file_urls,
ai_model=ai_model,
user=user,
party=party,
company=company,
+ communication_name=doc.name,
)
frappe.db.commit()
+ else:
+ for attachment in attachments:
+ _parse(
+ country=country,
+ transaction=transaction_type,
+ file_urls=attachment.file_url,
+ ai_model=ai_model,
+ user=user,
+ party=party,
+ company=company,
+ )
+ frappe.db.commit()
doc.db_set("is_processed_by_transaction_parser", 1)
diff --git a/transaction_parser/transaction_parser/utils/file_processor.py b/transaction_parser/transaction_parser/utils/file_processor.py
index 3cebb9b..7ad4fcc 100644
--- a/transaction_parser/transaction_parser/utils/file_processor.py
+++ b/transaction_parser/transaction_parser/utils/file_processor.py
@@ -9,6 +9,7 @@
read_xlsx_file_from_attached_file,
)
+from transaction_parser.exceptions import FileProcessingError
from transaction_parser.transaction_parser.utils.pdf_processor import (
PDFProcessor,
get_pdf_processor,
@@ -26,11 +27,15 @@ def get_content(
page_limit: int | None = None,
pdf_processor: PDFProcessor | None = None,
) -> str | None:
- if doc.file_type == "PDF":
- return self.process_pdf(doc, page_limit, pdf_processor)
+ try:
+ if doc.file_type == "PDF":
+ return self.process_pdf(doc, page_limit, pdf_processor)
- if doc.file_type in ("CSV", "XLSX", "XLS"):
- return self.process_spreadsheet(doc)
+ if doc.file_type in ("CSV", "XLSX", "XLS"):
+ return self.process_spreadsheet(doc)
+
+ except Exception as e:
+ raise FileProcessingError from e
frappe.throw(
title=_("Unsupported File Type"),
diff --git a/transaction_parser/transaction_parser/utils/pdf_processor.py b/transaction_parser/transaction_parser/utils/pdf_processor.py
index 8d0df88..cdbddb2 100644
--- a/transaction_parser/transaction_parser/utils/pdf_processor.py
+++ b/transaction_parser/transaction_parser/utils/pdf_processor.py
@@ -221,3 +221,9 @@ def get_pdf_processor(name: str | None = None) -> PDFProcessor:
)
return frappe.get_attr(class_path)()
+
+
+def get_available_pdf_processors() -> list[str]:
+ """Return names of all registered PDF processors from hooks."""
+ processors = frappe.get_hooks("pdf_processors") or {}
+ return list(processors.keys())
diff --git a/transaction_parser/transaction_parser/utils/response_merger.py b/transaction_parser/transaction_parser/utils/response_merger.py
new file mode 100644
index 0000000..826750c
--- /dev/null
+++ b/transaction_parser/transaction_parser/utils/response_merger.py
@@ -0,0 +1,234 @@
+from dataclasses import dataclass
+from typing import Any
+
+from frappe import _dict
+
+
+@dataclass
+class FieldType:
+ """Base class for field types in schema."""
+
+ required: bool
+
+ def is_empty(self, value: Any) -> bool:
+ return (
+ value is None
+ or value == ""
+ or (isinstance(value, list | dict) and len(value) == 0)
+ )
+
+
+@dataclass
+class PrimitiveField(FieldType):
+ """Represents a primitive field (string, number, date, etc.)."""
+
+ pass
+
+
+@dataclass
+class ObjectField(FieldType):
+ """Represents a nested object field with child fields."""
+
+ children: dict[str, FieldType]
+
+
+@dataclass
+class ListField(FieldType):
+ """Represents a list/array field."""
+
+ item_type: FieldType # Type of items in the list
+
+
+class SchemaParser:
+ """Parses a schema dict into a structured FieldType hierarchy."""
+
+ def parse(self, schema: dict) -> dict[str, FieldType]:
+ """Parse schema dict into field name -> FieldType mapping."""
+ fields = {}
+
+ for key, value in schema.items():
+ fields[key] = self._parse_field(value)
+
+ return fields
+
+ def _parse_field(self, schema_value: Any) -> FieldType:
+ """Determine and return the appropriate FieldType for a schema value."""
+ if isinstance(schema_value, list):
+ if not schema_value:
+ return ListField(required=True, item_type=PrimitiveField(required=True))
+
+ item_schema = schema_value[0]
+
+ if isinstance(item_schema, dict):
+ item_fields = self.parse(item_schema)
+ return ListField(
+ required=True,
+ item_type=ObjectField(required=True, children=item_fields),
+ )
+ else:
+ return ListField(
+ required=True,
+ item_type=PrimitiveField(required=True),
+ )
+
+ elif isinstance(schema_value, dict):
+ children = self.parse(schema_value)
+ return ObjectField(required=True, children=children)
+
+ else:
+ return PrimitiveField(required=True)
+
+
+class ResponseMerger:
+ """Schema-driven merger for AI responses from multiple attachments."""
+
+ def __init__(
+ self,
+ response: dict,
+ schema: dict,
+ match_keys: dict[str, list[str]] | None = None,
+ ):
+ self.response = _dict(response) if isinstance(response, dict) else response
+ self.schema = schema
+ self.match_keys = match_keys or {}
+
+ parser = SchemaParser()
+ self.fields = parser.parse(schema)
+
+ def is_complete(self) -> bool:
+ """Return True if all required fields are filled."""
+ return len(self.get_missing_fields()) == 0
+
+ def get_missing_fields(self) -> list[str]:
+ """Return dot-separated paths of missing required fields."""
+ missing = []
+ self._check_missing_fields(self.fields, self.response, "", missing)
+ return missing
+
+ def _check_missing_fields(
+ self,
+ fields: dict[str, FieldType],
+ data: dict,
+ path_prefix: str,
+ missing: list[str],
+ ) -> None:
+ for key, field_type in fields.items():
+ field_path = f"{path_prefix}.{key}" if path_prefix else key
+ value = data.get(key) if isinstance(data, dict) else None
+
+ if isinstance(field_type, PrimitiveField):
+ if field_type.required and field_type.is_empty(value):
+ missing.append(field_path)
+
+ elif isinstance(field_type, ObjectField):
+ if field_type.required and field_type.is_empty(value):
+ missing.append(field_path)
+ elif value:
+ self._check_missing_fields(
+ field_type.children, value, field_path, missing
+ )
+
+ # List fields are checked during merging, not at top level
+
+ def merge(self, new_response: dict) -> None:
+ """Merge new_response into the existing response."""
+ self._merge_fields(self.fields, self.response, new_response)
+
+ def _merge_fields(
+ self,
+ fields: dict[str, FieldType],
+ target: dict,
+ source: dict,
+ ) -> None:
+ for key, field_type in fields.items():
+ source_value = source.get(key)
+
+ if source_value is None:
+ continue
+
+ if isinstance(field_type, PrimitiveField):
+ self._merge_primitive(target, key, source_value)
+
+ elif isinstance(field_type, ObjectField):
+ self._merge_object(field_type, target, key, source_value)
+
+ elif isinstance(field_type, ListField):
+ self._merge_list(field_type, target, key, source_value)
+
+ def _merge_primitive(self, target: dict, key: str, source_value: Any) -> None:
+ if not target.get(key) and source_value:
+ target[key] = source_value
+
+ def _merge_object(
+ self,
+ field_type: ObjectField,
+ target: dict,
+ key: str,
+ source_value: dict,
+ ) -> None:
+ if key not in target or target[key] is None:
+ target[key] = {}
+
+ self._merge_fields(field_type.children, target[key], source_value)
+
+ def _merge_list(
+ self,
+ field_type: ListField,
+ target: dict,
+ key: str,
+ source_value: list,
+ ) -> None:
+ target_list = target.get(key, [])
+
+ if not target_list:
+ target[key] = source_value
+ return
+
+ if not source_value:
+ return
+
+ if not isinstance(field_type.item_type, ObjectField):
+ return
+
+ key_fields = self.match_keys.get(key, [])
+ if not key_fields:
+ return
+
+ for target_item in target_list:
+ matched_item = self._find_matching_item(
+ target_item, source_value, key_fields
+ )
+ if matched_item:
+ self._merge_fields(
+ field_type.item_type.children, target_item, matched_item
+ )
+
+ def _find_matching_item(
+ self,
+ target_item: dict,
+ source_items: list[dict],
+ key_fields: list[str],
+ ) -> dict | None:
+ # Require at least 2 matching key fields to avoid false positives
+ for source_item in source_items:
+ if self._items_match(target_item, source_item, key_fields):
+ return source_item
+
+ return None
+
+ def _items_match(
+ self,
+ item1: dict,
+ item2: dict,
+ key_fields: list[str],
+ ) -> bool:
+ matches = 0
+
+ for field in key_fields:
+ value1 = item1.get(field)
+ value2 = item2.get(field)
+
+ if value1 and value2 and value1 == value2:
+ matches += 1
+
+ return matches >= 2
diff --git a/transaction_parser/uninstall.py b/transaction_parser/uninstall.py
index bb326e2..8196823 100644
--- a/transaction_parser/uninstall.py
+++ b/transaction_parser/uninstall.py
@@ -2,6 +2,8 @@
FIELDS_TO_DELETE = {
"Transaction Parser Settings": ["in_auto_create_supplier"],
+ "Sales Order": ["is_created_by_transaction_parser"],
+ "Purchase Invoice": ["is_created_by_transaction_parser"],
}