diff --git a/api/db/models.py b/api/db/models.py index f76c93b..e0f7b62 100644 --- a/api/db/models.py +++ b/api/db/models.py @@ -1,6 +1,7 @@ from sqlmodel import SQLModel, Field from sqlalchemy import Column, JSON from datetime import datetime +from typing import Optional class Template(SQLModel, table=True): id: int | None = Field(default=None, primary_key=True) @@ -15,4 +16,5 @@ class FormSubmission(SQLModel, table=True): template_id: int input_text: str output_pdf_path: str + needs_review: Optional[dict] = Field(default=None, sa_column=Column(JSON)) created_at: datetime = Field(default_factory=datetime.utcnow) \ No newline at end of file diff --git a/api/routes/forms.py b/api/routes/forms.py index f3430ed..4f4d3f8 100644 --- a/api/routes/forms.py +++ b/api/routes/forms.py @@ -11,15 +11,24 @@ @router.post("/fill", response_model=FormFillResponse) def fill_form(form: FormFill, db: Session = Depends(get_db)): - if not get_template(db, form.template_id): - raise AppError("Template not found", status_code=404) - + # Fetch the template once — eliminates the previous double-query TOCTOU bug fetched_template = get_template(db, form.template_id) + if not fetched_template: + raise AppError("Template not found", status_code=404) controller = Controller() - path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path) + # fill_form now returns a tuple: (output_pdf_path, needs_review_dict) + path, needs_review = controller.fill_form( + user_input=form.input_text, + fields=fetched_template.fields, + pdf_form_path=fetched_template.pdf_path, + ) - submission = FormSubmission(**form.model_dump(), output_pdf_path=path) + submission = FormSubmission( + **form.model_dump(), + output_pdf_path=path, + needs_review=needs_review or None, + ) return create_form(db, submission) diff --git a/api/schemas/forms.py b/api/schemas/forms.py index 3cce650..5448757 100644 --- a/api/schemas/forms.py +++ b/api/schemas/forms.py @@ -1,4 +1,5 @@ from pydantic import BaseModel +from typing import Optional class FormFill(BaseModel): template_id: int @@ -10,6 +11,7 @@ class FormFillResponse(BaseModel): template_id: int input_text: str output_pdf_path: str + needs_review: Optional[dict] = None # Fields the LLM was not confident about; must be verified by a human class Config: from_attributes = True \ No newline at end of file diff --git a/src/file_manipulator.py b/src/file_manipulator.py index b7815cc..baf46c1 100644 --- a/src/file_manipulator.py +++ b/src/file_manipulator.py @@ -20,28 +20,31 @@ def create_template(self, pdf_path: str): def fill_form(self, user_input: str, fields: list, pdf_form_path: str): """ It receives the raw data, runs the PDF filling logic, - and returns the path to the newly created file. + and returns (output_pdf_path, needs_review) where needs_review is a dict + of {field_name: {suggested_value, confidence}} for human verification. """ print("[1] Received request from frontend.") print(f"[2] PDF template path: {pdf_form_path}") if not os.path.exists(pdf_form_path): print(f"Error: PDF template not found at {pdf_form_path}") - return None # Or raise an exception + return None, {} # Or raise an exception print("[3] Starting extraction and PDF filling process...") try: self.llm._target_fields = fields self.llm._transcript_text = user_input - output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm) + # filler.fill_form now returns a tuple: (output_pdf_path, needs_review) + output_name, needs_review = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm) print("\n----------------------------------") print("✅ Process Complete.") print(f"Output saved to: {output_name}") + if needs_review: + print(f"⚠️ {len(needs_review)} field(s) flagged for human review: {list(needs_review.keys())}") - return output_name + return output_name, needs_review except Exception as e: print(f"An error occurred during PDF generation: {e}") - # Re-raise the exception so the frontend can handle it raise e diff --git a/src/filler.py b/src/filler.py index e31e535..c968b37 100644 --- a/src/filler.py +++ b/src/filler.py @@ -9,8 +9,14 @@ def __init__(self): def fill_form(self, pdf_form: str, llm: LLM): """ - Fill a PDF form with values from user_input using LLM. - Fields are filled in the visual order (top-to-bottom, left-to-right). + Fill a PDF form with confirmed high-confidence values from the LLM. + Fields flagged as low-confidence are written with a [REVIEW REQUIRED] placeholder + so reviewers can easily spot them in the document. + + Returns: + tuple: (output_pdf_path: str, needs_review: dict) + needs_review is a dict of {field_name: {suggested_value, confidence}} + that must be presented to the user for manual verification. """ output_pdf = ( pdf_form[:-4] @@ -19,34 +25,32 @@ def fill_form(self, pdf_form: str, llm: LLM): + "_filled.pdf" ) - # Generate dictionary of answers from your original function + # Run LLM extraction — populates both confirmed and needs_review buckets t2j = llm.main_loop() - textbox_answers = t2j.get_data() # This is a dictionary + confirmed_answers = t2j.get_data() # high-confidence fields + needs_review = t2j.get_needs_review() # low-confidence fields - answers_list = list(textbox_answers.values()) + # Merge all field names so we can look up values by name + all_answers = dict(confirmed_answers) + all_answers.update({ + field: "[REVIEW REQUIRED]" + for field in needs_review + }) # Read PDF pdf = PdfReader(pdf_form) - # Loop through pages + # Loop through pages and fill by field name (annot.T) not by position for page in pdf.pages: if page.Annots: - sorted_annots = sorted( - page.Annots, key=lambda a: (-float(a.Rect[1]), float(a.Rect[0])) - ) - - i = 0 - for annot in sorted_annots: + for annot in page.Annots: if annot.Subtype == "/Widget" and annot.T: - if i < len(answers_list): - annot.V = f"{answers_list[i]}" + # annot.T is a PDF string like "(FieldName)"; strip the parens + field_name = str(annot.T).strip("()") + if field_name in all_answers: + annot.V = str(all_answers[field_name]) annot.AP = None - i += 1 - else: - # Stop if we run out of answers - break PdfWriter().write(output_pdf, pdf) - # Your main.py expects this function to return the path - return output_pdf + return output_pdf, needs_review diff --git a/src/llm.py b/src/llm.py index 70937f9..011e1df 100644 --- a/src/llm.py +++ b/src/llm.py @@ -4,12 +4,15 @@ class LLM: + CONFIDENCE_THRESHOLD = 0.85 + def __init__(self, transcript_text=None, target_fields=None, json=None): if json is None: json = {} self._transcript_text = transcript_text # str self._target_fields = target_fields # List, contains the template field. - self._json = json # dictionary + self._json = json # dictionary: confirmed fields (confidence >= threshold) + self._needs_review = {} # fields with low confidence that a human must verify def type_check_all(self): if type(self._transcript_text) is not str: @@ -26,19 +29,29 @@ def type_check_all(self): def build_prompt(self, current_field): """ This method is in charge of the prompt engineering. It creates a specific prompt for each target field. + Returns a structured JSON object with value and confidence so that downstream logic can apply + a human-in-the-loop review for hallucinated or low-confidence fields. @params: current_field -> represents the current element of the json that is being prompted. """ - prompt = f""" + prompt = f""" SYSTEM PROMPT: - You are an AI assistant designed to help fillout json files with information extracted from transcribed voice recordings. - You will receive the transcription, and the name of the JSON field whose value you have to identify in the context. Return - only a single string containing the identified value for the JSON field. - If the field name is plural, and you identify more than one possible value in the text, return both separated by a ";". - If you don't identify the value in the provided text, return "-1". + You are an AI assistant designed to help fill out JSON files with information extracted from transcribed voice recordings. + You will receive the transcription and the name of the JSON field whose value you must identify. + + You MUST respond with a single valid JSON object and nothing else. The JSON must have exactly two keys: + - "value": the identified string value for the field, or null if not found. + - "confidence": a float between 0.0 and 1.0 representing how certain you are. + + Rules: + - If the field is plural and you find multiple values, separate them with ";" in the value string. + - If you cannot find the value, set "value" to null and "confidence" to 0.0. + - Do NOT add any explanation or text outside the JSON object. + + Example output: {{"value": "John Doe", "confidence": 0.95}} --- DATA: Target JSON field to find in text: {current_field} - + TEXT: {self._transcript_text} """ @@ -70,11 +83,10 @@ def main_loop(self): except requests.exceptions.HTTPError as e: raise RuntimeError(f"Ollama returned an error: {e}") - # parse response + # parse raw Ollama response json_data = response.json() - parsed_response = json_data["response"] - # print(parsed_response) - self.add_response_to_json(field, parsed_response) + raw_text = json_data["response"].strip() + self.add_response_to_json(field, raw_text) print("----------------------------------") print("\t[LOG] Resulting JSON created from the input text:") @@ -83,24 +95,45 @@ def main_loop(self): return self - def add_response_to_json(self, field, value): + def add_response_to_json(self, field, raw_text): """ - this method adds the following value under the specified field, - or under a new field if the field doesn't exist, to the json dict + Parses the structured JSON response from the LLM. + Confirmed fields (confidence >= CONFIDENCE_THRESHOLD) go into self._json. + Low-confidence fields go into self._needs_review for human verification. """ - value = value.strip().replace('"', "") - parsed_value = None - - if value != "-1": - parsed_value = value - - if ";" in value: - parsed_value = self.handle_plural_values(value) - - if field in self._json.keys(): - self._json[field].append(parsed_value) + import json as json_lib + value = None + confidence = 0.0 + + try: + # The LLM is prompted to always return a JSON object + parsed = json_lib.loads(raw_text) + value = parsed.get("value") + confidence = float(parsed.get("confidence", 0.0)) + except (json_lib.JSONDecodeError, ValueError, TypeError): + # If the LLM failed to return valid JSON, treat the whole text as a + # low-confidence raw string so it gets flagged for human review. + print(f"\t[WARN]: LLM returned non-JSON for field '{field}'. Flagging for review.") + value = raw_text if raw_text not in ("-1", "null", "") else None + confidence = 0.0 + + # Handle plural values (semicolon-separated) + if value and ";" in str(value): + value = self.handle_plural_values(value) + + if confidence >= self.CONFIDENCE_THRESHOLD: + # High-confidence: write directly into the confirmed JSON + if field in self._json: + self._json[field].append(value) + else: + self._json[field] = value else: - self._json[field] = parsed_value + # Low-confidence: flag for human-in-the-loop review + print(f"\t[REVIEW REQUIRED]: Field '{field}' has confidence {confidence:.2f} (threshold: {self.CONFIDENCE_THRESHOLD}). Value: '{value}'") + self._needs_review[field] = { + "suggested_value": value, + "confidence": confidence, + } return @@ -132,4 +165,9 @@ def handle_plural_values(self, plural_value): return values def get_data(self): + """Returns confirmed high-confidence field values.""" return self._json + + def get_needs_review(self): + """Returns fields that could not be extracted with sufficient confidence and require human review.""" + return self._needs_review diff --git a/tests/test_llm_confidence.py b/tests/test_llm_confidence.py new file mode 100644 index 0000000..c66760e --- /dev/null +++ b/tests/test_llm_confidence.py @@ -0,0 +1,112 @@ +""" +Tests for LLM confidence validation and human-in-the-loop review logic. + +These tests verify that: +1. High-confidence fields are written into the PDF normally. +2. Low-confidence fields are separated into the needs_review bucket. +3. LLM non-JSON responses are safely caught and flagged for review. +4. The CONFIDENCE_THRESHOLD constant is applied correctly. +""" + +import json +import pytest +from unittest.mock import patch, MagicMock +from src.llm import LLM + + +CONFIDENCE_THRESHOLD = LLM.CONFIDENCE_THRESHOLD # 0.85 + + +def make_llm(fields: dict) -> LLM: + """Helper: Create an LLM instance with a dummy transcript and target fields.""" + return LLM( + transcript_text="The employee is John Doe. His badge number is 12345.", + target_fields=fields, + json={}, + ) + + +def mock_ollama_response(value, confidence): + """Helper: Build a mock requests.Response for the Ollama API.""" + mock_resp = MagicMock() + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "response": json.dumps({"value": value, "confidence": confidence}) + } + return mock_resp + + +class TestAddResponseToJson: + """Unit tests for LLM.add_response_to_json()""" + + def test_high_confidence_field_goes_into_confirmed(self): + """A field with confidence >= threshold must end up in _json (confirmed).""" + llm = make_llm({"employee_name": None}) + raw = json.dumps({"value": "John Doe", "confidence": 0.95}) + llm.add_response_to_json("employee_name", raw) + + assert llm.get_data()["employee_name"] == "John Doe" + assert "employee_name" not in llm.get_needs_review() + + def test_low_confidence_field_goes_into_needs_review(self): + """A field with confidence < threshold must end up in needs_review, not _json.""" + llm = make_llm({"badge_number": None}) + raw = json.dumps({"value": "99999", "confidence": 0.50}) + llm.add_response_to_json("badge_number", raw) + + assert "badge_number" not in llm.get_data() + review = llm.get_needs_review() + assert "badge_number" in review + assert review["badge_number"]["suggested_value"] == "99999" + assert review["badge_number"]["confidence"] == pytest.approx(0.50) + + def test_null_value_low_confidence_is_flagged(self): + """A field where LLM says it couldn't find the value should be flagged.""" + llm = make_llm({"incident_code": None}) + raw = json.dumps({"value": None, "confidence": 0.0}) + llm.add_response_to_json("incident_code", raw) + + assert "incident_code" not in llm.get_data() + assert "incident_code" in llm.get_needs_review() + + def test_non_json_response_is_safely_caught_and_flagged(self): + """If the LLM returns garbage (not JSON), it must be caught and flagged — not crash.""" + llm = make_llm({"address": None}) + llm.add_response_to_json("address", "Sorry, I don't know the address.") + + assert "address" not in llm.get_data() + assert "address" in llm.get_needs_review() + + def test_exactly_at_threshold_is_confirmed(self): + """A field with confidence exactly equal to the threshold is confirmed (not flagged).""" + llm = make_llm({"date": None}) + raw = json.dumps({"value": "01/02/2005", "confidence": CONFIDENCE_THRESHOLD}) + llm.add_response_to_json("date", raw) + + assert llm.get_data()["date"] == "01/02/2005" + assert "date" not in llm.get_needs_review() + + +class TestMainLoop: + """Integration tests for LLM.main_loop() with a mocked Ollama API.""" + + @patch("src.llm.requests.post") + def test_main_loop_separates_confirmed_and_flagged(self, mock_post): + """main_loop must correctly separate high/low confidence fields from a real loop.""" + fields = {"employee_name": None, "badge_number": None} + llm = make_llm(fields) + + # employee_name returns high confidence; badge_number returns low + mock_post.side_effect = [ + mock_ollama_response("John Doe", 0.97), + mock_ollama_response("???", 0.40), + ] + + llm.main_loop() + + assert llm.get_data().get("employee_name") == "John Doe" + assert "employee_name" not in llm.get_needs_review() + + assert "badge_number" not in llm.get_data() + assert "badge_number" in llm.get_needs_review() + assert llm.get_needs_review()["badge_number"]["confidence"] == pytest.approx(0.40)