From c8232aa2bde0583ecbcff2241898a513a044206a Mon Sep 17 00:00:00 2001 From: Gopi Rk Date: Sat, 7 Mar 2026 14:09:03 +0530 Subject: [PATCH] feat(api): Interactive Voice Feedback Loop for Missing Incident Data (#106) Implemented stateful forms to support iterative data collection: - FormSubmission now stores status, extracted_data, and missing_fields - Decoupled LLM.main_loop() extraction from Filler.fill_form() to allow partial states - POST /forms/fill now checks for missing fields. Returns status: missing_data and pauses execution if data is incomplete. - Added POST /forms/{id}/feedback endpoint to accept follow-up user input, targeting only the previously missing_fields. - Generates the final PDF only when all fields are successfully extracted and status: completed. - Added mock-driven integration tests for the feedback loop flow. --- api/db/models.py | 5 ++- api/routes/forms.py | 70 ++++++++++++++++++++++++++--- api/schemas/forms.py | 8 +++- src/controller.py | 7 ++- src/file_manipulator.py | 30 +++++++------ src/filler.py | 10 ++--- src/llm.py | 9 +++- tests/test_forms.py | 97 ++++++++++++++++++++++++++++++----------- 8 files changed, 179 insertions(+), 57 deletions(-) diff --git a/api/db/models.py b/api/db/models.py index f76c93b..0589d1f 100644 --- a/api/db/models.py +++ b/api/db/models.py @@ -14,5 +14,8 @@ class FormSubmission(SQLModel, table=True): id: int | None = Field(default=None, primary_key=True) template_id: int input_text: str - output_pdf_path: str + output_pdf_path: str | None = None + status: str = Field(default="completed") + extracted_data: dict = Field(default_factory=dict, sa_column=Column(JSON)) + missing_fields: list = Field(default_factory=list, sa_column=Column(JSON)) created_at: datetime = Field(default_factory=datetime.utcnow) \ No newline at end of file diff --git a/api/routes/forms.py b/api/routes/forms.py index f3430ed..b96f6e8 100644 --- a/api/routes/forms.py +++ b/api/routes/forms.py @@ -1,7 +1,7 @@ from fastapi import APIRouter, Depends from sqlmodel import Session from api.deps import get_db -from api.schemas.forms import FormFill, FormFillResponse +from api.schemas.forms import FormFill, FormFeedback, FormFillResponse from api.db.repositories import create_form, get_template from api.db.models import FormSubmission from api.errors.base import AppError @@ -11,15 +11,73 @@ @router.post("/fill", response_model=FormFillResponse) def fill_form(form: FormFill, db: Session = Depends(get_db)): - if not get_template(db, form.template_id): - raise AppError("Template not found", status_code=404) - fetched_template = get_template(db, form.template_id) + if not fetched_template: + raise AppError("Template not found", status_code=404) controller = Controller() - path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path) + + extracted_data, missing_fields = controller.extract_data( + user_input=form.input_text, + fields=fetched_template.fields + ) - submission = FormSubmission(**form.model_dump(), output_pdf_path=path) + if missing_fields: + status = "missing_data" + path = None + else: + status = "completed" + path = controller.fill_pdf(answers=extracted_data, pdf_form_path=fetched_template.pdf_path) + + submission = FormSubmission( + template_id=form.template_id, + input_text=form.input_text, + output_pdf_path=path, + status=status, + extracted_data=extracted_data, + missing_fields=missing_fields + ) return create_form(db, submission) +@router.post("/{submission_id}/feedback", response_model=FormFillResponse) +def form_feedback(submission_id: int, feedback: FormFeedback, db: Session = Depends(get_db)): + submission = db.get(FormSubmission, submission_id) + if not submission: + raise AppError("Form submission not found", status_code=404) + + if submission.status == "completed": + raise AppError("Form already completed", status_code=400) + + fetched_template = get_template(db, submission.template_id) + if not fetched_template: + raise AppError("Template not found", status_code=404) + + controller = Controller() + + # Only target missing fields from the template + target_fields = {field: fetched_template.fields[field] for field in submission.missing_fields if field in fetched_template.fields} + + extracted_data, missing_fields = controller.extract_data( + user_input=feedback.input_text, + fields=target_fields, + existing_data=submission.extracted_data + ) + + if missing_fields: + submission.status = "missing_data" + submission.output_pdf_path = None + else: + submission.status = "completed" + submission.output_pdf_path = controller.fill_pdf(answers=extracted_data, pdf_form_path=fetched_template.pdf_path) + + submission.extracted_data = extracted_data + submission.missing_fields = missing_fields + + db.add(submission) + db.commit() + db.refresh(submission) + + return submission + + diff --git a/api/schemas/forms.py b/api/schemas/forms.py index 3cce650..60f25d1 100644 --- a/api/schemas/forms.py +++ b/api/schemas/forms.py @@ -1,15 +1,21 @@ from pydantic import BaseModel +from typing import Optional class FormFill(BaseModel): template_id: int input_text: str +class FormFeedback(BaseModel): + input_text: str class FormFillResponse(BaseModel): id: int template_id: int input_text: str - output_pdf_path: str + status: str + output_pdf_path: Optional[str] = None + extracted_data: dict = {} + missing_fields: list = [] class Config: from_attributes = True \ No newline at end of file diff --git a/src/controller.py b/src/controller.py index d31ec9c..4679fe8 100644 --- a/src/controller.py +++ b/src/controller.py @@ -4,8 +4,11 @@ class Controller: def __init__(self): self.file_manipulator = FileManipulator() - def fill_form(self, user_input: str, fields: list, pdf_form_path: str): - return self.file_manipulator.fill_form(user_input, fields, pdf_form_path) + def extract_data(self, user_input: str, fields: dict, existing_data: dict = None): + return self.file_manipulator.extract_data(user_input, fields, existing_data) + + def fill_pdf(self, answers: dict, pdf_form_path: str): + return self.file_manipulator.fill_pdf(answers, pdf_form_path) def create_template(self, pdf_path: str): return self.file_manipulator.create_template(pdf_path) \ No newline at end of file diff --git a/src/file_manipulator.py b/src/file_manipulator.py index b7815cc..1278077 100644 --- a/src/file_manipulator.py +++ b/src/file_manipulator.py @@ -17,23 +17,28 @@ def create_template(self, pdf_path: str): prepare_form(pdf_path, template_path) return template_path - def fill_form(self, user_input: str, fields: list, pdf_form_path: str): + def extract_data(self, user_input: str, fields: dict, existing_data: dict = None): """ - It receives the raw data, runs the PDF filling logic, - and returns the path to the newly created file. + Runs LLM to extract data. Returns extracted_data and missing_fields. """ - print("[1] Received request from frontend.") - print(f"[2] PDF template path: {pdf_form_path}") - + print("[1] Starting extraction process...") + if existing_data is None: + existing_data = {} + + llm = LLM(transcript_text=user_input, target_fields=fields, json=existing_data) + llm.main_loop() + return llm.get_data(), llm.get_missing_fields() + + def fill_pdf(self, answers: dict, pdf_form_path: str): + """ + Receives extracted data and fills the PDF. + """ + print(f"[2] Filling PDF template: {pdf_form_path}") if not os.path.exists(pdf_form_path): - print(f"Error: PDF template not found at {pdf_form_path}") - return None # Or raise an exception + raise FileNotFoundError(f"PDF template not found at {pdf_form_path}") - print("[3] Starting extraction and PDF filling process...") try: - self.llm._target_fields = fields - self.llm._transcript_text = user_input - output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm) + output_name = self.filler.fill_form(pdf_form=pdf_form_path, answers=answers) print("\n----------------------------------") print("✅ Process Complete.") @@ -43,5 +48,4 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str): except Exception as e: print(f"An error occurred during PDF generation: {e}") - # Re-raise the exception so the frontend can handle it raise e diff --git a/src/filler.py b/src/filler.py index e31e535..f825196 100644 --- a/src/filler.py +++ b/src/filler.py @@ -7,9 +7,9 @@ class Filler: def __init__(self): pass - def fill_form(self, pdf_form: str, llm: LLM): + def fill_form(self, pdf_form: str, answers: dict): """ - Fill a PDF form with values from user_input using LLM. + Fill a PDF form with values from answers dictionary. Fields are filled in the visual order (top-to-bottom, left-to-right). """ output_pdf = ( @@ -19,11 +19,7 @@ def fill_form(self, pdf_form: str, llm: LLM): + "_filled.pdf" ) - # Generate dictionary of answers from your original function - t2j = llm.main_loop() - textbox_answers = t2j.get_data() # This is a dictionary - - answers_list = list(textbox_answers.values()) + answers_list = list(answers.values()) # Read PDF pdf = PdfReader(pdf_form) diff --git a/src/llm.py b/src/llm.py index 70937f9..870e10a 100644 --- a/src/llm.py +++ b/src/llm.py @@ -10,6 +10,7 @@ def __init__(self, transcript_text=None, target_fields=None, json=None): self._transcript_text = transcript_text # str self._target_fields = target_fields # List, contains the template field. self._json = json # dictionary + self._missing_fields = [] def type_check_all(self): if type(self._transcript_text) is not str: @@ -72,8 +73,9 @@ def main_loop(self): # parse response json_data = response.json() - parsed_response = json_data["response"] - # print(parsed_response) + parsed_response = json_data["response"].strip() + if parsed_response.replace('"', "") == "-1": + self._missing_fields.append(field) self.add_response_to_json(field, parsed_response) print("----------------------------------") @@ -133,3 +135,6 @@ def handle_plural_values(self, plural_value): def get_data(self): return self._json + + def get_missing_fields(self): + return self._missing_fields diff --git a/tests/test_forms.py b/tests/test_forms.py index 8f432bf..aa9ad25 100644 --- a/tests/test_forms.py +++ b/tests/test_forms.py @@ -1,25 +1,72 @@ -def test_submit_form(client): - pass - # First create a template - # form_payload = { - # "template_id": 3, - # "input_text": "Hi. The employee's name is John Doe. His job title is managing director. His department supervisor is Jane Doe. His phone number is 123456. His email is jdoe@ucsc.edu. The signature is , and the date is 01/02/2005", - # } - - # template_res = client.post("/templates/", json=template_payload) - # template_id = template_res.json()["id"] - - # # Submit a form - # form_payload = { - # "template_id": template_id, - # "data": {"rating": 5, "comment": "Great service"}, - # } - - # response = client.post("/forms/", json=form_payload) - - # assert response.status_code == 200 - - # data = response.json() - # assert data["id"] is not None - # assert data["template_id"] == template_id - # assert data["data"] == form_payload["data"] +from unittest.mock import patch, MagicMock + +def test_interactive_feedback_loop(client): + # 1) Create a template + template_payload = { + "name": "Test Form", + "pdf_path": "src/inputs/test.pdf", + "fields": { + "Employee name": "", + "Job title": "" + } + } + + with patch("api.routes.templates.prepare_form") as mock_prepare: + template_res = client.post("/templates/create", json=template_payload) + template_id = template_res.json()["id"] + + # 2) First Fill (Missing "Job title") + form_payload = { + "template_id": template_id, + "input_text": "The employee name is John Doe." + } + + def mock_ollama_call(*args, **kwargs): + json_payload = kwargs.get("json", {}) + prompt = json_payload.get("prompt", "") + + mock_response = MagicMock() + if "Employee name" in prompt: + mock_response.json.return_value = {"response": "John Doe"} + else: + mock_response.json.return_value = {"response": "-1"} # Missing + return mock_response + + with patch("src.llm.requests.post", side_effect=mock_ollama_call): + with patch("os.path.exists", return_value=True): + response = client.post("/forms/fill", json=form_payload) + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "missing_data" + assert "Job title" in data["missing_fields"] + assert data["extracted_data"]["Employee name"] == "John Doe" + submission_id = data["id"] + + # 3) Feedback (Providing "Job title") + feedback_payload = { + "input_text": "His job title is Engineer." + } + + def mock_ollama_feedback(*args, **kwargs): + json_payload = kwargs.get("json", {}) + prompt = json_payload.get("prompt", "") + + mock_response = MagicMock() + if "Job title" in prompt: + mock_response.json.return_value = {"response": "Engineer"} + else: + mock_response.json.return_value = {"response": "-1"} + return mock_response + + with patch("src.llm.requests.post", side_effect=mock_ollama_feedback): + with patch("src.filler.Filler.fill_form", return_value="output_path.pdf"): + with patch("os.path.exists", return_value=True): + feedback_res = client.post(f"/forms/{submission_id}/feedback", json=feedback_payload) + + assert feedback_res.status_code == 200 + feedback_data = feedback_res.json() + assert feedback_data["status"] == "completed" + assert len(feedback_data["missing_fields"]) == 0 + assert feedback_data["extracted_data"]["Job title"] == "Engineer" + assert feedback_data["extracted_data"]["Employee name"] == "John Doe"