From 4d4573bdd805c24f2c034ad2101408f95d4c3da8 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Fri, 9 Jan 2026 11:46:45 -0500 Subject: [PATCH 01/32] Add proposed_changes table to models/utils --- src/pir_pipeline/models/pir_sql_models.py | 11 ++++++++++- src/pir_pipeline/utils/SQLAlchemyUtils.py | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/pir_pipeline/models/pir_sql_models.py b/src/pir_pipeline/models/pir_sql_models.py index c2cd402..c765d81 100644 --- a/src/pir_pipeline/models/pir_sql_models.py +++ b/src/pir_pipeline/models/pir_sql_models.py @@ -10,12 +10,13 @@ String, Table, Text, + UniqueConstraint, func, null, or_, select, - UniqueConstraint, ) +from sqlalchemy.dialects.postgresql.json import JSONB from pir_pipeline.utils.sql_alchemy_view import view @@ -103,6 +104,14 @@ Column("complete_series_flag", Integer, default=0), ) +proposed_changes = Table( + "proposed_changes", + sql_metadata, + Column("id", String(255), primary_key=True), + Column("link_dict", JSONB), + Column("html", Text), +) + # Confirmed records should be excluded confirmed_subquery = ( select(uqid_changelog.c["original_uqid"]) diff --git a/src/pir_pipeline/utils/SQLAlchemyUtils.py b/src/pir_pipeline/utils/SQLAlchemyUtils.py index 301cebe..5b5de9a 100644 --- a/src/pir_pipeline/utils/SQLAlchemyUtils.py +++ b/src/pir_pipeline/utils/SQLAlchemyUtils.py @@ -14,6 +14,7 @@ confirmed, linked, program, + proposed_changes, question, response, sql_metadata, @@ -67,6 +68,7 @@ def __init__(self, user: str, password: str, host: str, port: int, database: str "response": response, "question": question, "program": program, + "proposed_changes": proposed_changes, "linked": linked, "unlinked": unlinked, "uqid_changelog": uqid_changelog, From b47608218e6741b897657849c8a614a78a549812 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Mon, 12 Jan 2026 16:00:18 -0500 Subject: [PATCH 02/32] PIRLinker: Remove redundant get_question_data code --- src/pir_pipeline/linking/PIRLinker.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/pir_pipeline/linking/PIRLinker.py b/src/pir_pipeline/linking/PIRLinker.py index ed8eb0a..015dd23 100644 --- a/src/pir_pipeline/linking/PIRLinker.py +++ b/src/pir_pipeline/linking/PIRLinker.py @@ -284,7 +284,6 @@ def join_on_type_and_section(self, which: str): )["year"].tolist() self._cross = df[df["year_y"].map(lambda x: x not in years)] self._cross = self._cross[self._cross["uqid_x"] != self._cross["uqid_y"]] - # Remove cases where unique_question_id combination is duplicated if len(unique_ids) == 1: for ident in ["question_id", "uqid"]: @@ -322,11 +321,6 @@ def confirm_link(row: pd.Series): self.question_data_check() - try: - self._question - except AttributeError: - self.get_question_data() - try: self._cross except AttributeError: From 79d7d844b0f196c98a4390d4f8471c896f2f94e8 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Mon, 12 Jan 2026 16:02:11 -0500 Subject: [PATCH 03/32] DashboardUtils: Create id_column dynamically in .search_matches --- src/pir_pipeline/utils/dashboard_utils.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/pir_pipeline/utils/dashboard_utils.py b/src/pir_pipeline/utils/dashboard_utils.py index 65ae987..453f265 100644 --- a/src/pir_pipeline/utils/dashboard_utils.py +++ b/src/pir_pipeline/utils/dashboard_utils.py @@ -248,7 +248,7 @@ def get_where_condition( return (id_column, next_id) -def search_matches(matches: dict, id_column: str, db: SQLAlchemyUtils) -> dict: +def search_matches(matches: dict, db: SQLAlchemyUtils) -> dict: """Iterate over matches to return all related rows Args: @@ -261,6 +261,7 @@ def search_matches(matches: dict, id_column: str, db: SQLAlchemyUtils) -> dict: """ output = {} for match in matches: + id_column = "uqid" if match.get("uqid") else "question_id" output.update(get_search_results(match[id_column], db, id_column)) return output @@ -565,9 +566,5 @@ def update_links(self): if __name__ == "__main__": from pir_pipeline.config import DB_CONFIG - db = SQLAlchemyUtils(**DB_CONFIG, database="pir_test") - print( - get_search_results("903863a832c884bdf311237ed570c44d", db)[ - "695fed8b2a237b322fcc5bd7fa52384e" - ] - ) + db = SQLAlchemyUtils(**DB_CONFIG, database="pir") + get_search_results("child", db) From 422173b33ba1afd345be392bfcd32a000d040142 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Mon, 12 Jan 2026 16:02:47 -0500 Subject: [PATCH 04/32] Dashboard: Refactor review confirm Refactor the review confirm button to send records to link_dict rather than to the database directly. --- src/pir_pipeline/dashboard/review.py | 18 +++++++- .../dashboard/static/review/flashcard.js | 2 +- tests/dashboard/test_review_ui.py | 43 +++++++++++++++---- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/pir_pipeline/dashboard/review.py b/src/pir_pipeline/dashboard/review.py index 5016ad2..33e7d8b 100644 --- a/src/pir_pipeline/dashboard/review.py +++ b/src/pir_pipeline/dashboard/review.py @@ -46,7 +46,7 @@ def get_flashcard_question( if matches and len(matches) > 1: matches.pop(0) - output["matches"] = search_matches(matches, id_column, db) + output["matches"] = search_matches(matches, db) elif matches and len(matches) == 1: output["matches"] = {"columns": output["question"]["columns"]} else: @@ -131,7 +131,6 @@ def link(): """Handle storage of link/unlink actions""" payload = request.get_json() action = payload["action"] - # Add a link/unlink entry to session if action == "build": data = payload["data"] @@ -149,6 +148,21 @@ def link(): link_dict = OrderedDict({dict_id: data}) session["link_dict"] = link_dict message = f"Data {data} queued for linking" + elif action == "store": + db = get_db() + link_dict = session["link_dict"] + proposed_id = sha1("".join(link_dict.keys()).encode()).hexdigest() + db.insert_records( + [ + { + "id": proposed_id, + "link_dict": json.dumps(link_dict), + "html": payload["action"], + } + ], + "proposed_changes", + ) + del session["link_dict"] # Execute all linking actions elif action == "finalize": diff --git a/src/pir_pipeline/dashboard/static/review/flashcard.js b/src/pir_pipeline/dashboard/static/review/flashcard.js index 4871b06..eb7d259 100644 --- a/src/pir_pipeline/dashboard/static/review/flashcard.js +++ b/src/pir_pipeline/dashboard/static/review/flashcard.js @@ -73,7 +73,7 @@ async function flashcardAction(e) { // Perform all linking actions to this points payload = { - "action": "finalize" + "action": "store" } await fetch("/review/link", { diff --git a/tests/dashboard/test_review_ui.py b/tests/dashboard/test_review_ui.py index 5634b04..36754af 100644 --- a/tests/dashboard/test_review_ui.py +++ b/tests/dashboard/test_review_ui.py @@ -1,4 +1,6 @@ +import json import os +import time import pytest from selenium.webdriver.common.by import By @@ -26,6 +28,11 @@ def count_modal_rows(table: str): return count + question_uqid_query = "SELECT question, uqid FROM question" + with sql_utils.engine.connect() as conn: + result = conn.execute(text(question_uqid_query)) + question_uqid_pairs = result.fetchall() + driver.get("http://127.0.0.1:5000/review") # Wait for the search results table and rows to appear @@ -50,7 +57,7 @@ def count_modal_rows(table: str): By.CSS_SELECTOR, "tr#flashcard-matches-table-tr-10000 td[name='question_id']", ).get_attribute("textContent") - == "0e93c25d3a95604f40d3a64e2298093b4faed6f2" + == "d27e8217ba30000a78e5d92ea54f4d9a2e69cb54" ) # Click the storeLink button in the first row @@ -117,12 +124,14 @@ def count_modal_rows(table: str): next_button.click() # Click the 'Previous' button + time.sleep(1) prev_button = driver.find_element( By.CSS_SELECTOR, 'form#flashcard-buttons button[value="previous"]' ) prev_button.click() # Extract the Question ID + time.sleep(1) question_id_element = driver.find_element( By.CSS_SELECTOR, '#flashcard-question-table td[name="question_id"]' ) @@ -131,19 +140,37 @@ def count_modal_rows(table: str): assert ( init_question_id == question_id - ), "Previous and Next buttons are not working fine" + ), f"Previous and Next buttons malfunctioning: initial question: {init_question_id}, current question: {question_id}" # Check the confirm changes button confirm_button = driver.find_element(By.ID, "confirm-button") confirm_button.click() with sql_utils.engine.connect() as conn: - result = conn.execute(text("SELECT question_id FROM confirmed")) - question_ids = result.scalars() - - assert ( - question_id in question_ids - ), f"{question_id} is not in confirmed IDs: {question_ids}" + # Confirm there is a record in proposed_changes + result = conn.execute(text("SELECT link_dict FROM proposed_changes")) + link_dict = result.scalar() + link_dict = json.loads(link_dict) + question_ids = [] + for link in link_dict.values(): + for qid in ["base_question_id", "match_question_id"]: + question_ids.append(link[qid]) + + assert ( + question_id in question_ids + ), f"{question_id} is not in IDs proposed for linkage: {question_ids}" + + # Confirm no uqids changed + result = conn.execute(text(question_uqid_query)) + records = result.fetchall() + sorted_records = sorted(records) + different_pairs = set(sorted_records).difference(set(question_uqid_pairs)) + assert not different_pairs, "question_id/uqid pairs have changed: {}" + + # Confirm no records in uqid_changelog + result = conn.execute(text("SELECT * FROM uqid_changelog")) + records = result.fetchall() + assert not records, f"Some records in uqid_changelog: {records}" if __name__ == "__main__": From 0afc629a1c83fe9e1dfdca7e345ab001769b4d95 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Mon, 12 Jan 2026 16:04:07 -0500 Subject: [PATCH 05/32] Search: Refactor confirm button Refactor the search confirm button to go to link_dict rather than to the database directly. --- src/pir_pipeline/dashboard/search.py | 2 +- .../dashboard/static/search/search.js | 2 +- tests/dashboard/test_search_ui.py | 50 ++++++++++++++----- 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/pir_pipeline/dashboard/search.py b/src/pir_pipeline/dashboard/search.py index b82931a..57a096c 100644 --- a/src/pir_pipeline/dashboard/search.py +++ b/src/pir_pipeline/dashboard/search.py @@ -35,7 +35,7 @@ def get_flashcard_question(offset: int | str, id_column: str, db: SQLAlchemyUtil if matches and len(matches) > 1: matches.pop(0) - output["matches"] = search_matches(matches, id_column, db) + output["matches"] = search_matches(matches, db) elif matches and len(matches) == 1: output["matches"] = {"columns": output["question"]["columns"]} else: diff --git a/src/pir_pipeline/dashboard/static/search/search.js b/src/pir_pipeline/dashboard/static/search/search.js index 0796ac5..332216f 100644 --- a/src/pir_pipeline/dashboard/static/search/search.js +++ b/src/pir_pipeline/dashboard/static/search/search.js @@ -81,7 +81,7 @@ function commitChanges(e) { // Commit changes to the database const payload = { - "action": "finalize", + "action": "store", "data": "" } diff --git a/tests/dashboard/test_search_ui.py b/tests/dashboard/test_search_ui.py index dbe7474..6fac5eb 100644 --- a/tests/dashboard/test_search_ui.py +++ b/tests/dashboard/test_search_ui.py @@ -1,4 +1,6 @@ +import json import os +import time import pytest from selenium.common.exceptions import TimeoutException @@ -66,7 +68,7 @@ def count_modal_rows(table: str): EC.element_to_be_clickable( ( By.XPATH, - '//table[@id="search-results-table"]//button[@onclick="getFlashcardData(event)"]', + '//table[@id="search-results-table"]//tr[@id="search-results-table-tr-10001"]//button[@onclick="getFlashcardData(event)"]', ) ) ) @@ -86,8 +88,11 @@ def count_modal_rows(table: str): assert modal_visible, "Edit modal did not appear after clicking Edit button." # Click the storeLink button in the first row + time.sleep(1) + question_rows_init = count_modal_rows("question") match_rows_init = count_modal_rows("matches") + storelink_button = wait.until( EC.element_to_be_clickable( ( @@ -141,27 +146,46 @@ def count_modal_rows(table: str): ) ) storelink_button.click() - init_question_ids = driver.find_elements( + + question_id_element = driver.find_element( By.CSS_SELECTOR, '#flashcard-question-table td[name="question_id"]' ) - init_question_ids = set( - [question.get_attribute("textContent") for question in init_question_ids] - ) + question_id = question_id_element.get_attribute("textContent").strip() confirm_button = driver.find_element(value="confirm-button") confirm_button.click() + question_uqid_query = "SELECT question, uqid FROM question" with sql_utils.engine.connect() as conn: - result = conn.execute(text("SELECT question_id FROM uqid_changelog")) - question_ids = result.scalars() + result = conn.execute(text(question_uqid_query)) + question_uqid_pairs = result.fetchall() - question_ids = set(question_ids) - - assert question_ids.issuperset( - init_question_ids - ), f"question_id set is not a superset of init_question_id set {question_ids.symmetric_difference(init_question_ids)}" + with sql_utils.engine.connect() as conn: + # Confirm there is a record in proposed_changes + result = conn.execute(text("SELECT link_dict FROM proposed_changes")) + link_dict = result.scalar() + link_dict = json.loads(link_dict) + question_ids = [] + for link in link_dict.values(): + for qid in ["base_question_id", "match_question_id"]: + question_ids.append(link[qid]) + + assert ( + question_id in question_ids + ), f"{question_id} is not in IDs proposed for linkage: {question_ids}" + + # Confirm no uqids changed + result = conn.execute(text(question_uqid_query)) + records = result.fetchall() + sorted_records = sorted(records) + different_pairs = set(sorted_records).difference(set(question_uqid_pairs)) + assert not different_pairs, "question_id/uqid pairs have changed: {}" + + # Confirm no records in uqid_changelog + result = conn.execute(text("SELECT * FROM uqid_changelog")) + records = result.fetchall() + assert not records, f"Some records in uqid_changelog: {records}" if __name__ == "__main__": pytest.main([__file__, "-sk", "test_search_ui"]) - pytest.main([__file__, "-sk", "test_search_ui"]) From 799fb3c3d57b6b807146fe2cbfd5084e00417978 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Tue, 13 Jan 2026 16:32:49 -0500 Subject: [PATCH 06/32] Dashboard: send question table HTML with store action --- src/pir_pipeline/dashboard/static/review/flashcard.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pir_pipeline/dashboard/static/review/flashcard.js b/src/pir_pipeline/dashboard/static/review/flashcard.js index eb7d259..3480988 100644 --- a/src/pir_pipeline/dashboard/static/review/flashcard.js +++ b/src/pir_pipeline/dashboard/static/review/flashcard.js @@ -73,7 +73,8 @@ async function flashcardAction(e) { // Perform all linking actions to this points payload = { - "action": "store" + "action": "store", + "html": questionTable.outerHTML } await fetch("/review/link", { From 68e2a4e2ce0e8d33f5075c9174084b003b154059 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Tue, 13 Jan 2026 16:33:23 -0500 Subject: [PATCH 07/32] Dashboard: index.py now creates missing tables --- src/pir_pipeline/dashboard/index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pir_pipeline/dashboard/index.py b/src/pir_pipeline/dashboard/index.py index 6396015..d9c5134 100644 --- a/src/pir_pipeline/dashboard/index.py +++ b/src/pir_pipeline/dashboard/index.py @@ -13,6 +13,7 @@ def index(): """Return the Home page""" db = get_db() + db.create_db() # Count of questions by year question = db.tables["question"] From 6416546a87285e112b4600ec47f424f380ff4782 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Tue, 13 Jan 2026 16:34:07 -0500 Subject: [PATCH 08/32] Dashboard: Handle incoming html in review.py --- src/pir_pipeline/dashboard/review.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/pir_pipeline/dashboard/review.py b/src/pir_pipeline/dashboard/review.py index 33e7d8b..3c0aba9 100644 --- a/src/pir_pipeline/dashboard/review.py +++ b/src/pir_pipeline/dashboard/review.py @@ -151,13 +151,20 @@ def link(): elif action == "store": db = get_db() link_dict = session["link_dict"] - proposed_id = sha1("".join(link_dict.keys()).encode()).hexdigest() + + ids = [ + record["base_question_id"] + record["match_question_id"] + for record in link_dict.items() + ] + ids.sort() + + proposed_id = sha1("".join(ids).encode()).hexdigest() db.insert_records( [ { "id": proposed_id, "link_dict": json.dumps(link_dict), - "html": payload["action"], + "html": payload["html"], } ], "proposed_changes", From ecc9b1bb440845a468b5fb2c7953afdaeed1c659 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Wed, 14 Jan 2026 16:11:40 -0500 Subject: [PATCH 09/32] Finalize: Render a table on the finalize page --- src/pir_pipeline/dashboard/__init__.py | 3 +- src/pir_pipeline/dashboard/finalize.py | 95 +++++++++++++++++++ .../dashboard/static/finalize/finalize.js | 64 +++++++++++++ .../dashboard/static/styles/finalize.css | 25 +++++ .../dashboard/templates/base.html | 3 + .../templates/finalize/finalize.html | 42 ++++++++ 6 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 src/pir_pipeline/dashboard/finalize.py create mode 100644 src/pir_pipeline/dashboard/static/finalize/finalize.js create mode 100644 src/pir_pipeline/dashboard/static/styles/finalize.css create mode 100644 src/pir_pipeline/dashboard/templates/finalize/finalize.html diff --git a/src/pir_pipeline/dashboard/__init__.py b/src/pir_pipeline/dashboard/__init__.py index fb5a54e..6e1996d 100644 --- a/src/pir_pipeline/dashboard/__init__.py +++ b/src/pir_pipeline/dashboard/__init__.py @@ -48,12 +48,13 @@ def create_app(test_config: dict = None, **kwargs) -> Flask: db.init_app(app) - from pir_pipeline.dashboard import index, review, search + from pir_pipeline.dashboard import finalize, index, review, search app.register_blueprint(index.bp) app.add_url_rule("/", endpoint="index") app.register_blueprint(search.bp) app.register_blueprint(review.bp) + app.register_blueprint(finalize.bp) return app diff --git a/src/pir_pipeline/dashboard/finalize.py b/src/pir_pipeline/dashboard/finalize.py new file mode 100644 index 0000000..1fc4ddd --- /dev/null +++ b/src/pir_pipeline/dashboard/finalize.py @@ -0,0 +1,95 @@ +import json +from collections.abc import Sequence +from math import ceil +from typing import Iterable, Optional + +from flask import Blueprint, render_template, request, session +from sqlalchemy import select, text + +from pir_pipeline.dashboard.db import get_db + +bp = Blueprint("finalize", __name__, url_prefix="/finalize") + + +class WrappedList(Sequence): + def __init__(self, iterable: Optional[Iterable]): + if isinstance(iterable, str): + self.collection = json.loads(iterable) + elif iterable: + self.collection = iterable + else: + self.collection = tuple() + + self.loc = 0 + self.max_index = self.__len__() - 1 + + def __getitem__(self, key): + return self.collection[key] + + def __len__(self): + return self.collection.__len__() + + @property + def current(self): + return self[self.loc] + + def next(self): + self.loc = self.loc + 1 if self.loc < self.max_index else 0 + return self[self.loc] + + def previous(self): + self.loc = self.loc - 1 if self.loc != 0 else self.max_index + return self[self.loc] + + def to_json(self): + return json.dumps(self.collection) + + +def get_page(): + db = get_db() + with db.engine.connect() as connection: + record_count = connection.execute( + text("SELECT COUNT(*) FROM proposed_changes") + ).scalar_one() + session["max_page"] = ceil(record_count / 10) + + return WrappedList(list(range(session["max_page"]))).to_json() + + +@bp.route("/", methods=["GET"]) +def index(): + session["finalize_page"] = 0 + session["number_displayed"] = 10 + session["page"] = get_page() + + return render_template("finalize/finalize.html") + + +@bp.route("/data", methods=["POST"]) +def data(): + db = get_db() + + number_displayed = session.get("number_displayed") + page: WrappedList = WrappedList(session.get("page", get_page())) + + response = request.get_json() + direction = response.get("direction") + + if direction is None: + pass + elif direction == "next": + page.next() + else: + page.previous() + + session["page"] = page.to_json() + + proposed_changes = db.tables["proposed_changes"] + query = ( + select(proposed_changes) + .limit(number_displayed) + .offset(page.current * number_displayed) + ) + records = db.get_records(query) + + return records.to_dict(orient="index") diff --git a/src/pir_pipeline/dashboard/static/finalize/finalize.js b/src/pir_pipeline/dashboard/static/finalize/finalize.js new file mode 100644 index 0000000..915d762 --- /dev/null +++ b/src/pir_pipeline/dashboard/static/finalize/finalize.js @@ -0,0 +1,64 @@ +import { updateFlashcardTables, rowToJSON, storeLink, buildTable, expandContractRow } from "../utilities.js"; + +// Run buildPage when the page is loaded +document.addEventListener("DOMContentLoaded", buildPage()) + +/** + * Populates the flashcard page with content + */ +function buildPage() { + // Compose request for finalize tables + const payload = { + "method": "POST", + "headers": { + "Content-type": "application/json" + }, + "body": JSON.stringify({}) + }; + + fetch("/finalize/data", payload) + .then(response => response.json()) + .then(data => insertFinalizeTables(data)); +} + +function insertFinalizeTables(data) { + const tableBox = document.querySelector(".table-box"); + tableBox.innerHTML = "" + + // https://stackoverflow.com/questions/494143/creating-a-new-dom-element-from-an-html-string-using-built-in-dom-methods-or-pro/35385518#35385518 + for (let key in data) { + const record = data[key]; + const template = document.createElement("template"); + template.innerHTML = record["html"]; + const tableHTML = template.content.firstChild; + tableHTML.id = record["id"]; + tableBox.appendChild(tableHTML); + }; +} + +function paginate(e) { + e.preventDefault(); + + // Get the form data + const name = e.submitter.name; + let value = e.submitter.value; + + const body = { + "direction": value + } + + const payload = { + "method": "POST", + "headers": { + "Content-type": "application/json" + }, + "body": JSON.stringify(body) + }; + + fetch("/finalize/data", payload) + .then(response => response.json()) + .then(data => insertFinalizeTables(data)); +} + +document.expandContractRow = expandContractRow; +document.paginate = paginate; \ No newline at end of file diff --git a/src/pir_pipeline/dashboard/static/styles/finalize.css b/src/pir_pipeline/dashboard/static/styles/finalize.css new file mode 100644 index 0000000..cc737da --- /dev/null +++ b/src/pir_pipeline/dashboard/static/styles/finalize.css @@ -0,0 +1,25 @@ +button { + background: none; + color: inherit; + border: none; + padding: 0; + font: inherit; + cursor: pointer; + outline: inherit; +} + +.footer { + position: fixed; + bottom: 0; + background-color: #C6C6C6; + width: 100%; + padding: 0em 5em; +} + +.form-parent { + margin:0; +} + +#keyword-search { + margin-top: 3em; +} \ No newline at end of file diff --git a/src/pir_pipeline/dashboard/templates/base.html b/src/pir_pipeline/dashboard/templates/base.html index 119945c..b53062a 100644 --- a/src/pir_pipeline/dashboard/templates/base.html +++ b/src/pir_pipeline/dashboard/templates/base.html @@ -33,6 +33,9 @@ + diff --git a/src/pir_pipeline/dashboard/templates/finalize/finalize.html b/src/pir_pipeline/dashboard/templates/finalize/finalize.html new file mode 100644 index 0000000..69b7c40 --- /dev/null +++ b/src/pir_pipeline/dashboard/templates/finalize/finalize.html @@ -0,0 +1,42 @@ +{% extends "base.html" %} + +{% block header %} + +

{% block title %}Finalize{% endblock %}

+{% endblock %} + +{% block content %} +
+

Finalize Changes

+
    +
  • Review the links proposed below.
  • +
  • + Confirm or deny each link +
      +
    • Click the Deny button to deny a link, removing it from the list of proposed changes.
    • +
    • Click the Confirm button to accept a link, committing it to the database.
    • +
    +
  • +
+
+
+
+{% endblock %} + +{% block scripts %} + + +{% endblock %} + +{% block footer %} + +{% endblock %} From 5d22aba27164026dd1fde585f78b808cfe0a8ea8 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Thu, 15 Jan 2026 14:33:04 -0500 Subject: [PATCH 10/32] Dashboard: Drafting finalize page --- .../dashboard/static/finalize/finalize.js | 37 ++++++++++++++++++- .../dashboard/static/styles/finalize.css | 26 +++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/pir_pipeline/dashboard/static/finalize/finalize.js b/src/pir_pipeline/dashboard/static/finalize/finalize.js index 915d762..60139e3 100644 --- a/src/pir_pipeline/dashboard/static/finalize/finalize.js +++ b/src/pir_pipeline/dashboard/static/finalize/finalize.js @@ -27,12 +27,47 @@ function insertFinalizeTables(data) { // https://stackoverflow.com/questions/494143/creating-a-new-dom-element-from-an-html-string-using-built-in-dom-methods-or-pro/35385518#35385518 for (let key in data) { + // Extract HTML + const tableDiv = document.createElement("div"); + tableDiv.className = "finalize-table-div"; + const record = data[key]; const template = document.createElement("template"); template.innerHTML = record["html"]; const tableHTML = template.content.firstChild; + tableHTML.innerHTML = tableHTML.innerHTML.replace("flashcard-question-table", record["id"]) tableHTML.id = record["id"]; - tableBox.appendChild(tableHTML); + + // Remove store buttons + const storeButtons = tableHTML.querySelectorAll("button[onclick='storeLink(event)']") + storeButtons.forEach(element => element.remove()) + + // Add confirm button + const confirmButton = document.createElement("button"); + confirmButton.classList.add(...["wrapper-button", "primary-button"]); + confirmButton.innerHTML = "Confirm"; + confirmButton.setAttribute("onclick", "commitLink(event)"); + confirmButton.name = "confirm"; + confirmButton.value = record["id"]; + + // Add deny button + const denyButton = document.createElement("button"); + denyButton.classList.add(...["wrapper-button", "primary-button", "deny-button"]); + denyButton.innerHTML = "Deny"; + denyButton.setAttribute("onclick", "commitLink(event)"); + denyButton.name = "deny"; + denyButton.value = record["id"]; + + // Create button container + const buttonContainer = document.createElement("div"); + buttonContainer.className = "button-container"; + buttonContainer.appendChild(denyButton); + buttonContainer.appendChild(confirmButton); + + // Render the content + tableDiv.appendChild(tableHTML); + tableDiv.appendChild(buttonContainer); + tableBox.appendChild(tableDiv); }; } diff --git a/src/pir_pipeline/dashboard/static/styles/finalize.css b/src/pir_pipeline/dashboard/static/styles/finalize.css index cc737da..6a234ee 100644 --- a/src/pir_pipeline/dashboard/static/styles/finalize.css +++ b/src/pir_pipeline/dashboard/static/styles/finalize.css @@ -22,4 +22,30 @@ button { #keyword-search { margin-top: 3em; +} + +.table-box { + margin-bottom: 2em; +} + +.finalize-table-div { + display: flex; + flex-direction: column; + margin-bottom: 1em; +} + +.primary-button { + align-self: flex-end; +} + +.deny-button { + background-color: #C41E3A; + margin: 0em 1em 0em 1em; +} +.deny-button:hover { + background-color: #880808; /* Darker blue for hover */ +} + +.button-container { + align-self: flex-end; } \ No newline at end of file From f3826c87562b97a7203cd92e9d05cfedc57f3149 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Thu, 15 Jan 2026 14:33:30 -0500 Subject: [PATCH 11/32] Dashboard: Correct ID sort in review.py --- src/pir_pipeline/dashboard/review.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/pir_pipeline/dashboard/review.py b/src/pir_pipeline/dashboard/review.py index 3c0aba9..1ff71ae 100644 --- a/src/pir_pipeline/dashboard/review.py +++ b/src/pir_pipeline/dashboard/review.py @@ -153,8 +153,10 @@ def link(): link_dict = session["link_dict"] ids = [ - record["base_question_id"] + record["match_question_id"] - for record in link_dict.items() + record.get("base_question_id", "") + or "" + record.get("match_question_id") + or "" + for record in link_dict.values() ] ids.sort() @@ -163,14 +165,14 @@ def link(): [ { "id": proposed_id, - "link_dict": json.dumps(link_dict), + "link_dict": list(link_dict.values()), "html": payload["html"], } ], "proposed_changes", ) del session["link_dict"] - + message = f"Record {link_dict} written to proposed changes." # Execute all linking actions elif action == "finalize": db = get_db() From e0c7b404d4bdc1f6bf7e61ae274615364a255592 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Thu, 15 Jan 2026 14:53:12 -0500 Subject: [PATCH 12/32] SQL Models: Flashcard view The unconfirmed view needed to be filtered to exclude items that are in the proposed_changes table. Filtering, however, led to incorrect counts on the home page. To avoid this, created an entirely new view that handles the flashcard page. --- src/pir_pipeline/dashboard/review.py | 2 +- src/pir_pipeline/models/pir_sql_models.py | 47 +++++++++++++++++++++++ src/pir_pipeline/utils/SQLAlchemyUtils.py | 2 + 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/pir_pipeline/dashboard/review.py b/src/pir_pipeline/dashboard/review.py index 1ff71ae..8f8f91c 100644 --- a/src/pir_pipeline/dashboard/review.py +++ b/src/pir_pipeline/dashboard/review.py @@ -35,7 +35,7 @@ def get_flashcard_question( dict: Dictionary containing data for header question and matching questions. """ - id_column, record = get_review_question("unconfirmed", offset, "uqid", db) + id_column, record = get_review_question("flashcard", offset, "uqid", db) if not record[id_column]: id_column = "question_id" diff --git a/src/pir_pipeline/models/pir_sql_models.py b/src/pir_pipeline/models/pir_sql_models.py index c765d81..ca94f16 100644 --- a/src/pir_pipeline/models/pir_sql_models.py +++ b/src/pir_pipeline/models/pir_sql_models.py @@ -11,7 +11,10 @@ Table, Text, UniqueConstraint, + and_, + distinct, func, + literal_column, null, or_, select, @@ -112,6 +115,32 @@ Column("html", Text), ) +# Here to proposed_ids definition written with GPT +dictionaries = ( + func.jsonb_array_elements(proposed_changes.c.link_dict) + .table_valued("value") + .alias("dictionaries") +) + +link_dicts = ( + select(dictionaries.c.value.label("value")) + .select_from(proposed_changes) + .join(dictionaries, literal_column("true")) # CROSS JOIN LATERAL + .subquery("link_dicts") +) + +dict_values = ( + func.jsonb_each_text(link_dicts.c.value).table_valued("key", "value").alias("lvl2") +) + +proposed_ids = ( + select(distinct(dict_values.c.value)) + .select_from(link_dicts) + .join(dict_values, literal_column("true")) # CROSS JOIN LATERAL + .where(dict_values.c.key == "base_question_id") + .scalar_subquery() +) + # Confirmed records should be excluded confirmed_subquery = ( select(uqid_changelog.c["original_uqid"]) @@ -148,3 +177,21 @@ ) unconfirmed = view("unconfirmed", sql_metadata, query) + +# Flashcard view +query = ( + select(question) + .where( + or_( + and_( + question.c.uqid.not_in(confirmed_subquery), + question.c.question_id.not_in(proposed_ids), + ), + question.c.uqid == null(), + ) + ) + .distinct() + .order_by(question.c.year, question.c.question_number) +) + +flashcard = view("flashcard", sql_metadata, query) diff --git a/src/pir_pipeline/utils/SQLAlchemyUtils.py b/src/pir_pipeline/utils/SQLAlchemyUtils.py index 5b5de9a..15163cd 100644 --- a/src/pir_pipeline/utils/SQLAlchemyUtils.py +++ b/src/pir_pipeline/utils/SQLAlchemyUtils.py @@ -12,6 +12,7 @@ from pir_pipeline.config import DB_CONFIG from pir_pipeline.models.pir_sql_models import ( confirmed, + flashcard, linked, program, proposed_changes, @@ -73,6 +74,7 @@ def __init__(self, user: str, password: str, host: str, port: int, database: str "unlinked": unlinked, "uqid_changelog": uqid_changelog, "confirmed": confirmed, + "flashcard": flashcard, "unconfirmed": unconfirmed, } self._database = database From 948ca4f5856ffae0ed9a258938a62a839877f0fb Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Sun, 18 Jan 2026 10:32:13 -0500 Subject: [PATCH 13/32] Flashcard: Confirm button handling Because of changes to the underlying view for the flashcard page, a new action is needed to prevent skipping records when paginating. --- src/pir_pipeline/dashboard/static/review/flashcard.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pir_pipeline/dashboard/static/review/flashcard.js b/src/pir_pipeline/dashboard/static/review/flashcard.js index 3480988..5e11eda 100644 --- a/src/pir_pipeline/dashboard/static/review/flashcard.js +++ b/src/pir_pipeline/dashboard/static/review/flashcard.js @@ -86,7 +86,7 @@ async function flashcardAction(e) { }) // Move to the next question - value = "next"; + value = "proposed"; } // Get the data for the next (previous) question and update the tables From 4c0ae04485d41145596f63d4bb73f4184c56658f Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Sun, 18 Jan 2026 10:39:04 -0500 Subject: [PATCH 14/32] Finalize: WIP logic for committing and denying links --- src/pir_pipeline/dashboard/review.py | 30 ++++++++++++++----- .../dashboard/static/finalize/finalize.js | 29 +++++++++++++++++- src/pir_pipeline/utils/dashboard_utils.py | 12 ++++---- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/pir_pipeline/dashboard/review.py b/src/pir_pipeline/dashboard/review.py index 8f8f91c..5c80bd2 100644 --- a/src/pir_pipeline/dashboard/review.py +++ b/src/pir_pipeline/dashboard/review.py @@ -5,7 +5,7 @@ from hashlib import sha1 from flask import Blueprint, render_template, request, session -from sqlalchemy import func, select +from sqlalchemy import delete, func, select from pir_pipeline.dashboard.db import get_db from pir_pipeline.utils.dashboard_utils import ( @@ -97,6 +97,10 @@ def flashcard(): output = get_flashcard_question(offset, db, session) + elif action == "proposed": + offset = session.get("current_question") + output = get_flashcard_question(offset, db, session) + return json.dumps(output) return render_template("review/flashcard.html") @@ -129,6 +133,7 @@ def data(): @bp.route("/link", methods=["POST"]) def link(): """Handle storage of link/unlink actions""" + db = get_db() payload = request.get_json() action = payload["action"] # Add a link/unlink entry to session @@ -149,7 +154,6 @@ def link(): session["link_dict"] = link_dict message = f"Data {data} queued for linking" elif action == "store": - db = get_db() link_dict = session["link_dict"] ids = [ @@ -174,11 +178,23 @@ def link(): del session["link_dict"] message = f"Record {link_dict} written to proposed changes." # Execute all linking actions - elif action == "finalize": - db = get_db() - link_dict = session["link_dict"] - QuestionLinker(link_dict, db).update_links() + elif action == "confirm": + proposed_changes = db.tables["proposed_changes"] + link_dict = db.get_scalar( + select(proposed_changes.c["link_dict"]), {"id": payload["id"]} + ) + print(link_dict) + # QuestionLinker(link_dict, db).update_links() message = "Links Updated!" - del session["link_dict"] + elif action == "deny": + proposed_changes = db.tables["proposed_changes"] + delete_query = delete(proposed_changes).where( + proposed_changes.c["id"] == payload["id"] + ) + + with db.engine.begin() as conn: + conn.execute(delete_query) + + message = f"Removed link associated with id: {payload["id"]}" return {"message": message} diff --git a/src/pir_pipeline/dashboard/static/finalize/finalize.js b/src/pir_pipeline/dashboard/static/finalize/finalize.js index 60139e3..056a386 100644 --- a/src/pir_pipeline/dashboard/static/finalize/finalize.js +++ b/src/pir_pipeline/dashboard/static/finalize/finalize.js @@ -95,5 +95,32 @@ function paginate(e) { .then(data => insertFinalizeTables(data)); } +function commitLink(e) { + e.preventDefault(); + + let button = e.target; + + const body = { + "action": button.name, + "id": button.value + } + + const payload = { + "method": "POST", + "headers": { + "Content-type": "application/json" + }, + "body": JSON.stringify(body) + }; + + fetch("/review/link", payload) + .then(response => { + const tableBox = document.querySelector(".table-box"); + tableBox.innerHTML = ""; + buildPage(); + }) +} + document.expandContractRow = expandContractRow; -document.paginate = paginate; \ No newline at end of file +document.paginate = paginate; +document.commitLink = commitLink; \ No newline at end of file diff --git a/src/pir_pipeline/utils/dashboard_utils.py b/src/pir_pipeline/utils/dashboard_utils.py index 453f265..d960ca9 100644 --- a/src/pir_pipeline/utils/dashboard_utils.py +++ b/src/pir_pipeline/utils/dashboard_utils.py @@ -313,14 +313,14 @@ def get_year_range(table: TableClause, _id: tuple[str], db: SQLAlchemyUtils) -> class QuestionLinker: """QuestionLinker class to handle linking and unlinking of questions""" - def __init__(self, data: dict, db: SQLAlchemyUtils, log: bool = True): + def __init__(self, data: list[dict], db: SQLAlchemyUtils, log: bool = True): """QuestionLinker object to handle linking and unlinking of questions Args: data (dict): A series of instructions for linking/unlinking questions. Should be of the form: - { - record_id_1: { + [ + { "link_type": link or unlink, "base_question_id": qid_1, "base_uqid": uqid_1, @@ -328,10 +328,10 @@ def __init__(self, data: dict, db: SQLAlchemyUtils, log: bool = True): "match_uqid": uqid_2 }, ... - record_id_n: { + { ... } - } + ] db (SQLAlchemyUtils): SQLAlchemyUtils object for database interactions log (bool): Boolean to determin whether to write to changelog. Defaults to True. """ @@ -544,7 +544,7 @@ def update_links(self): Makes calles to QuestionLinker.link and QuestionLinker.unlink as needed """ - for key, value in self._data.items(): + for value in self._data: try: self._record = value link_type = value["link_type"] From 2e80dfc9dc8e32a9c942bb4444c7cd78bb1b42b0 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie Date: Tue, 20 Jan 2026 16:14:02 -0500 Subject: [PATCH 15/32] Dashboard-Finalize: Tests for finalize --- src/pir_pipeline/dashboard/finalize.py | 20 ++++--- src/pir_pipeline/models/pir_sql_models.py | 16 +++-- tests/dashboard/test_finalize.py | 73 +++++++++++++++++++++++ 3 files changed, 98 insertions(+), 11 deletions(-) create mode 100644 tests/dashboard/test_finalize.py diff --git a/src/pir_pipeline/dashboard/finalize.py b/src/pir_pipeline/dashboard/finalize.py index 1fc4ddd..169115d 100644 --- a/src/pir_pipeline/dashboard/finalize.py +++ b/src/pir_pipeline/dashboard/finalize.py @@ -10,9 +10,11 @@ bp = Blueprint("finalize", __name__, url_prefix="/finalize") +DEFAULT_DISPLAYED = 10 + class WrappedList(Sequence): - def __init__(self, iterable: Optional[Iterable]): + def __init__(self, iterable: Optional[Iterable], loc: int = 0): if isinstance(iterable, str): self.collection = json.loads(iterable) elif iterable: @@ -20,7 +22,7 @@ def __init__(self, iterable: Optional[Iterable]): else: self.collection = tuple() - self.loc = 0 + self.loc = loc self.max_index = self.__len__() - 1 def __getitem__(self, key): @@ -42,7 +44,8 @@ def previous(self): return self[self.loc] def to_json(self): - return json.dumps(self.collection) + js = [self.collection, self.loc] + return json.dumps(js) def get_page(): @@ -51,7 +54,9 @@ def get_page(): record_count = connection.execute( text("SELECT COUNT(*) FROM proposed_changes") ).scalar_one() - session["max_page"] = ceil(record_count / 10) + session["max_page"] = ceil( + record_count / session.get("number_displayed", DEFAULT_DISPLAYED) + ) return WrappedList(list(range(session["max_page"]))).to_json() @@ -59,7 +64,7 @@ def get_page(): @bp.route("/", methods=["GET"]) def index(): session["finalize_page"] = 0 - session["number_displayed"] = 10 + session["number_displayed"] = session.get("number_displayed", DEFAULT_DISPLAYED) session["page"] = get_page() return render_template("finalize/finalize.html") @@ -69,8 +74,9 @@ def index(): def data(): db = get_db() - number_displayed = session.get("number_displayed") - page: WrappedList = WrappedList(session.get("page", get_page())) + number_displayed: int = session.get("number_displayed", DEFAULT_DISPLAYED) + page_tuple = json.loads(session.get("page", get_page())) + page: WrappedList = WrappedList(*page_tuple) response = request.get_json() direction = response.get("direction") diff --git a/src/pir_pipeline/models/pir_sql_models.py b/src/pir_pipeline/models/pir_sql_models.py index ca94f16..18f3407 100644 --- a/src/pir_pipeline/models/pir_sql_models.py +++ b/src/pir_pipeline/models/pir_sql_models.py @@ -179,15 +179,23 @@ unconfirmed = view("unconfirmed", sql_metadata, query) # Flashcard view +proposed_uqid_query = ( + select(question.c.uqid) + .where(and_(question.c.question_id.in_(proposed_ids), question.c.uqid != null())) + .distinct() + .subquery() +) + query = ( select(question) .where( - or_( - and_( + and_( + or_( question.c.uqid.not_in(confirmed_subquery), - question.c.question_id.not_in(proposed_ids), + question.c.uqid == null(), ), - question.c.uqid == null(), + question.c.question_id.not_in(proposed_ids), + question.c.uqid.not_in(proposed_uqid_query), ) ) .distinct() diff --git a/tests/dashboard/test_finalize.py b/tests/dashboard/test_finalize.py new file mode 100644 index 0000000..c580141 --- /dev/null +++ b/tests/dashboard/test_finalize.py @@ -0,0 +1,73 @@ +import re +import time + +import pytest +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +from sqlalchemy import select + + +@pytest.fixture +def propose_changes(server, driver, sql_utils): + wait = WebDriverWait(driver, 10) + driver.get("http://127.0.0.1:5000/review") + records = sql_utils.get_records(select(sql_utils.tables["flashcard"])) + while not records.empty: + wait.until(EC.presence_of_element_located((By.ID, "flashcard-question-table"))) + confirm_button = driver.find_element(By.ID, "confirm-button") + wait.until(EC.presence_of_element_located((By.ID, "confirm-button"))) + confirm_button.click() + records = sql_utils.get_records(select(sql_utils.tables["flashcard"])) + time.sleep(2) + + +@pytest.mark.usefixtures( + "create_database", "app", "error_message_constructor", "propose_changes" +) +class TestFinalizeRoutes: + def test_get_index(self, client): + response = client.get("/finalize/") + assert ( + response.status_code == 200 + ), f"Expected 200 response from app, instead got {response.status_code}" + + expected_title = "Finalize" + returned_title = re.search(".*<\\/title>", response.text).group(0) + assert ( + returned_title == expected_title + ), f"Response from finalize did not return expected result, {expected_title}. Instead got {returned_title}." + + def test_post_data(self, client, driver): + with client.session_transaction() as sess: + sess["number_displayed"] = 5 + + with client: + # Test if direction is none + response = client.post("/finalize/data", json={}) + data = response.json + qid = data["0"]["link_dict"][0]["base_question_id"] + assert qid == "5f50241087df4b86810c044c4777566f50ae7453" + assert len(data.keys()) == 5 + # Test if direction is next + response = client.post("/finalize/data", json={"direction": "next"}) + data = response.json + qid = data["0"]["link_dict"][0]["base_question_id"] + assert qid == "d27e8217ba30000a78e5d92ea54f4d9a2e69cb54" + assert len(data.keys()) == 1 + # Test if direction is previous + response = client.post("/finalize/data", json={"direction": "previous"}) + data = response.json + qid = data["0"]["link_dict"][0]["base_question_id"] + assert qid == "5f50241087df4b86810c044c4777566f50ae7453" + assert len(data.keys()) == 5 + # Test if direction is none + response = client.post("/finalize/data", json={"direction": "previous"}) + data = response.json + qid = data["0"]["link_dict"][0]["base_question_id"] + assert qid == "d27e8217ba30000a78e5d92ea54f4d9a2e69cb54" + assert len(data.keys()) == 1 + + +if __name__ == "__main__": + pytest.main([__file__, "-sk", "test_post_data"]) From e5e0d96fbbdd5a02f030e37cefb60e03fc731f83 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Wed, 21 Jan 2026 16:30:20 -0500 Subject: [PATCH 16/32] Dashboard: Tests for review.link Test that review/link/deny and review/link/confirm work as expected --- src/pir_pipeline/dashboard/review.py | 9 ++- tests/dashboard/test_review.py | 88 +++++++++++++++++++++++++++- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/src/pir_pipeline/dashboard/review.py b/src/pir_pipeline/dashboard/review.py index 5c80bd2..88f64da 100644 --- a/src/pir_pipeline/dashboard/review.py +++ b/src/pir_pipeline/dashboard/review.py @@ -183,8 +183,13 @@ def link(): link_dict = db.get_scalar( select(proposed_changes.c["link_dict"]), {"id": payload["id"]} ) - print(link_dict) - # QuestionLinker(link_dict, db).update_links() + QuestionLinker(link_dict, db).update_links() + delete_query = delete(proposed_changes).where( + proposed_changes.c["id"] == payload["id"] + ) + with db.engine.begin() as conn: + conn.execute(delete_query) + message = "Links Updated!" elif action == "deny": proposed_changes = db.tables["proposed_changes"] diff --git a/tests/dashboard/test_review.py b/tests/dashboard/test_review.py index f3d6703..89ffb3c 100644 --- a/tests/dashboard/test_review.py +++ b/tests/dashboard/test_review.py @@ -75,7 +75,7 @@ def test_post_data(self, client, error_message_constructor): "Incorrect question_name", expected_name, actual_name ) - def test_post_link(self, client): + def test_post_link(self, client, sql_utils, error_message_constructor): with client: client.post( "/review/link", @@ -85,6 +85,90 @@ def test_post_link(self, client): "base_question_id": "B" } + input_records = [ + { + "0": { + "link_type": "confirm", + "base_question_id": "B", + "match_question_id": "", + } + }, + { + "1": { + "link_type": "confirm", + "base_question_id": "A", + "match_question_id": "", + } + }, + ] + for record in input_records: + with client.session_transaction() as sess: + sess["link_dict"] = record + + client.post( + "/review/link", + json={ + "action": "store", + "html": "", + }, + ) + + with sql_utils.engine.connect() as conn: + result = conn.execute(select(sql_utils.tables["proposed_changes"])) + records = result.all() + + expected = 2 + got = len(records) + assert expected == got, error_message_constructor( + "Incorrect number of records", expected, got + ) + + expected = [input_records[0]["0"]] + got = records[0][1] + assert expected == got, error_message_constructor( + "Incorrect link dict", expected, got + ) + + deny_id = records[0][0] + client.post("/review/link", json={"action": "deny", "id": deny_id}) + + with sql_utils.engine.connect() as conn: + result = conn.execute(select(sql_utils.tables["proposed_changes"])) + records = result.all() + + expected = 1 + got = len(records) + assert expected == got, error_message_constructor( + "Incorrect number of records", expected, got + ) + + confirm_id = records[0][0] + client.post("/review/link", json={"action": "confirm", "id": confirm_id}) + + with sql_utils.engine.connect() as conn: + result = conn.execute(select(sql_utils.tables["proposed_changes"])) + proposed_records = result.all() + result = conn.execute(select(sql_utils.tables["uqid_changelog"])) + confirmed_records = result.all() + + expected = 0 + got = len(proposed_records) + assert expected == got, error_message_constructor( + "Incorrect number of records", expected, got + ) + + expected = 1 + got = len(confirmed_records) + assert expected == got, error_message_constructor( + "Incorrect number of records", expected, got + ) + + expected = input_records[1]["1"]["base_question_id"] + got = confirmed_records[0][2] + assert expected == got, error_message_constructor( + "Incorrect record in db", expected, got + ) + if __name__ == "__main__": - pytest.main([__file__, "-sk", "test_post_finalize"]) + pytest.main([__file__, "-sk", "test_post_link"]) From d3be58109eeaa4d2afe0a30849e29218d54b0d8b Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Wed, 21 Jan 2026 16:35:51 -0500 Subject: [PATCH 17/32] PIRLinker: Fix == to is --- src/pir_pipeline/linking/PIRLinker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pir_pipeline/linking/PIRLinker.py b/src/pir_pipeline/linking/PIRLinker.py index 015dd23..8194787 100644 --- a/src/pir_pipeline/linking/PIRLinker.py +++ b/src/pir_pipeline/linking/PIRLinker.py @@ -152,7 +152,7 @@ def consolidate_uqids(self) -> Self: self._question.groupby(columns + ["year"]).size() == 1 ).reset_index() unique_records = unique_records.groupby(columns).min().reset_index() - unique_records = unique_records[unique_records[0] == True] + unique_records = unique_records[unique_records[0] is True] unique_records = unique_records[columns].groupby(columns).first().reset_index() # Get the modal uqid From 09c9fa95f86e6f010715cc20803823ad466fa873 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 22 Jan 2026 11:31:18 -0500 Subject: [PATCH 18/32] Tests.dashboard.finalize: Remove driver usage to allow running in cloud --- tests/dashboard/test_finalize.py | 67 +++++++++++++++++++------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/tests/dashboard/test_finalize.py b/tests/dashboard/test_finalize.py index c580141..7cf4533 100644 --- a/tests/dashboard/test_finalize.py +++ b/tests/dashboard/test_finalize.py @@ -1,29 +1,45 @@ import re -import time import pytest -from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import WebDriverWait -from sqlalchemy import select -@pytest.fixture -def propose_changes(server, driver, sql_utils): - wait = WebDriverWait(driver, 10) - driver.get("http://127.0.0.1:5000/review") - records = sql_utils.get_records(select(sql_utils.tables["flashcard"])) - while not records.empty: - wait.until(EC.presence_of_element_located((By.ID, "flashcard-question-table"))) - confirm_button = driver.find_element(By.ID, "confirm-button") - wait.until(EC.presence_of_element_located((By.ID, "confirm-button"))) - confirm_button.click() - records = sql_utils.get_records(select(sql_utils.tables["flashcard"])) - time.sleep(2) +@pytest.fixture(scope="class") +def insert_records(client): + input_records = [ + { + f"{i}": { + "link_type": "confirm", + "base_question_id": f"{i}", + "match_question_id": "", + } + } + for i in range(6) + ] + for record in input_records: + with client.session_transaction() as sess: + sess["link_dict"] = record + + client.post( + "/review/link", + json={ + "action": "store", + "html": "", + }, + ) + + +@pytest.fixture(scope="class") +def set_number_displayed(client): + with client.session_transaction() as sess: + sess["number_displayed"] = 5 @pytest.mark.usefixtures( - "create_database", "app", "error_message_constructor", "propose_changes" + "create_database", + "app", + "error_message_constructor", + "insert_records", + "set_number_displayed", ) class TestFinalizeRoutes: def test_get_index(self, client): @@ -38,34 +54,31 @@ def test_get_index(self, client): returned_title == expected_title ), f"Response from finalize did not return expected result, {expected_title}. Instead got {returned_title}." - def test_post_data(self, client, driver): - with client.session_transaction() as sess: - sess["number_displayed"] = 5 - + def test_post_data(self, client): with client: # Test if direction is none response = client.post("/finalize/data", json={}) data = response.json qid = data["0"]["link_dict"][0]["base_question_id"] - assert qid == "5f50241087df4b86810c044c4777566f50ae7453" + assert qid == "0" assert len(data.keys()) == 5 # Test if direction is next response = client.post("/finalize/data", json={"direction": "next"}) data = response.json qid = data["0"]["link_dict"][0]["base_question_id"] - assert qid == "d27e8217ba30000a78e5d92ea54f4d9a2e69cb54" + assert qid == "5" assert len(data.keys()) == 1 # Test if direction is previous response = client.post("/finalize/data", json={"direction": "previous"}) data = response.json qid = data["0"]["link_dict"][0]["base_question_id"] - assert qid == "5f50241087df4b86810c044c4777566f50ae7453" + assert qid == "0" assert len(data.keys()) == 5 - # Test if direction is none + # Test wrapping response = client.post("/finalize/data", json={"direction": "previous"}) data = response.json qid = data["0"]["link_dict"][0]["base_question_id"] - assert qid == "d27e8217ba30000a78e5d92ea54f4d9a2e69cb54" + assert qid == "5" assert len(data.keys()) == 1 From a341175b1ea3e169960ded9351e9eb753f5e838b Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 22 Jan 2026 11:31:52 -0500 Subject: [PATCH 19/32] Finalize: Add number displayed dropdown --- src/pir_pipeline/dashboard/finalize.py | 7 ++++++- .../dashboard/static/finalize/finalize.js | 9 +++++++-- .../dashboard/templates/finalize/finalize.html | 11 +++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/pir_pipeline/dashboard/finalize.py b/src/pir_pipeline/dashboard/finalize.py index 169115d..fc23928 100644 --- a/src/pir_pipeline/dashboard/finalize.py +++ b/src/pir_pipeline/dashboard/finalize.py @@ -79,10 +79,15 @@ def data(): page: WrappedList = WrappedList(*page_tuple) response = request.get_json() - direction = response.get("direction") + direction: str = response.get("direction") if direction is None: pass + elif direction.isdigit(): + number_displayed = int(direction) + session["number_displayed"] = number_displayed + page_tuple = json.loads(get_page()) + page = WrappedList(*page_tuple) elif direction == "next": page.next() else: diff --git a/src/pir_pipeline/dashboard/static/finalize/finalize.js b/src/pir_pipeline/dashboard/static/finalize/finalize.js index 056a386..02cfb05 100644 --- a/src/pir_pipeline/dashboard/static/finalize/finalize.js +++ b/src/pir_pipeline/dashboard/static/finalize/finalize.js @@ -75,8 +75,13 @@ function paginate(e) { e.preventDefault(); // Get the form data - const name = e.submitter.name; - let value = e.submitter.value; + try { + var name = e.submitter.name; + var value = e.submitter.value; + } catch { + var name = e.target.name; + var value = e.target.value; + } const body = { "direction": value diff --git a/src/pir_pipeline/dashboard/templates/finalize/finalize.html b/src/pir_pipeline/dashboard/templates/finalize/finalize.html index 69b7c40..2d9c1a6 100644 --- a/src/pir_pipeline/dashboard/templates/finalize/finalize.html +++ b/src/pir_pipeline/dashboard/templates/finalize/finalize.html @@ -19,6 +19,17 @@ <h3>Finalize Changes</h3> </li> </ul> </div> + <div class="number-displayed"> + <label for="number-displayed"> + Number of Records to Display: + </label> + <select name="number-displayed" id="number-displayed" onchange="paginate(event)"> + <option value=5>5</option> + <option value=10 selected="selected">10</option> + <option value=25>25</option> + <option value=50>50</option> + </select> + </div> <div class="table-box"> </div> {% endblock %} From ee179dc985a8f5e840cbf594ee2912ded5009c9f Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 22 Jan 2026 11:34:10 -0500 Subject: [PATCH 20/32] Tests.dashboard.finalize_ui: Tests for finalize ui --- tests/dashboard/test_finalize_ui.py | 113 ++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 tests/dashboard/test_finalize_ui.py diff --git a/tests/dashboard/test_finalize_ui.py b/tests/dashboard/test_finalize_ui.py new file mode 100644 index 0000000..ce7813e --- /dev/null +++ b/tests/dashboard/test_finalize_ui.py @@ -0,0 +1,113 @@ +import os +import time + +import pytest +from selenium.webdriver import Firefox +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select, WebDriverWait +from sqlalchemy import select + +from pir_pipeline.utils.SQLAlchemyUtils import SQLAlchemyUtils + + +@pytest.fixture +def propose_changes(server, driver, sql_utils): + wait = WebDriverWait(driver, 10) + driver.get("http://127.0.0.1:5000/review") + records = sql_utils.get_records(select(sql_utils.tables["flashcard"])) + while not records.empty: + wait.until(EC.presence_of_element_located((By.ID, "flashcard-question-table"))) + confirm_button = driver.find_element(By.ID, "confirm-button") + wait.until(EC.presence_of_element_located((By.ID, "confirm-button"))) + confirm_button.click() + records = sql_utils.get_records(select(sql_utils.tables["flashcard"])) + time.sleep(1) + + +@pytest.mark.skipif( + bool(os.getenv("ON_RUNNER")), reason="Test does not run on GitHub runner" +) +@pytest.mark.usefixtures( + "create_database", "insert_question_records", "server", "propose_changes" +) +class TestFinalizeUI: + def test_pagination(self, driver: Firefox, error_message_constructor, client): + driver.get("http://127.0.0.1:5000/finalize") + number_displayed = driver.find_element(By.ID, "number-displayed") + Select(number_displayed).select_by_value("5") + wait = WebDriverWait(driver, 10) + # Go to next page and confirm 1 record + next_button = driver.find_element(By.ID, "next-button") + next_button.click() + wait.until(EC.presence_of_element_located((By.TAG_NAME, "table"))) + tables = driver.find_elements(By.TAG_NAME, "table") + expected = 1 + got = len(tables) + assert expected == got, error_message_constructor( + "Incorrect number of tables", expected, got + ) + + # Go to next page and confirm 5 records + next_button = driver.find_element(By.ID, "next-button") + next_button.click() + wait.until(EC.presence_of_element_located((By.TAG_NAME, "table"))) + tables = driver.find_elements(By.TAG_NAME, "table") + expected = 5 + got = len(tables) + assert expected == got, error_message_constructor( + "Incorrect number of tables", expected, got + ) + + # Go to previous page and confirm 1 record + previous_button = driver.find_element(By.ID, "previous-button") + previous_button.click() + wait.until(EC.presence_of_element_located((By.TAG_NAME, "table"))) + tables = driver.find_elements(By.TAG_NAME, "table") + expected = 1 + got = len(tables) + assert expected == got, error_message_constructor( + "Incorrect number of tables", expected, got + ) + + def test_action_buttons( + self, driver: Firefox, sql_utils: SQLAlchemyUtils, error_message_constructor + ): + driver.get("http://127.0.0.1:5000/finalize") + wait = WebDriverWait(driver, 10) + + wait.until(EC.presence_of_element_located((By.CLASS_NAME, "deny-button"))) + deny_button = driver.find_element(By.CLASS_NAME, "deny-button") + deny_button.click() + + proposed_changes_query = select(sql_utils.tables["proposed_changes"]) + + with sql_utils.engine.connect() as conn: + response = conn.execute(proposed_changes_query) + proposed_changes = response.all() + + expected = 5 + got = len(proposed_changes) + assert expected == got, error_message_constructor( + "Incorrect number of records", expected, got + ) + + wait.until( + EC.presence_of_element_located((By.CSS_SELECTOR, "button[name='confirm']")) + ) + confirm_button = driver.find_element(By.CSS_SELECTOR, "button[name='confirm']") + confirm_button.click() + + with sql_utils.engine.connect() as conn: + response = conn.execute(proposed_changes_query) + proposed_changes = response.all() + + expected = 4 + got = len(proposed_changes) + assert expected == got, error_message_constructor( + "Incorrect number of records", expected, got + ) + + +if __name__ == "__main__": + pytest.main([__file__, "-sk", "test_action_buttons"]) From b8496a48b6d971cee67239506f0f7cf32c4729aa Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 22 Jan 2026 13:03:49 -0500 Subject: [PATCH 21/32] Dashboard: Ensure correct config is used for dashboard --- code/serve_qa_dashboard.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/code/serve_qa_dashboard.sh b/code/serve_qa_dashboard.sh index fc02cab..ea59f4b 100755 --- a/code/serve_qa_dashboard.sh +++ b/code/serve_qa_dashboard.sh @@ -1,4 +1,5 @@ cd ~/repos/ACF-pir-data/ source .venv/bin/activate +export LOADING_DASHBOARD="True" google-chrome http://localhost:8080 &>/dev/null & waitress-serve --host 127.0.0.1 --call "pir_pipeline.dashboard:create_app" \ No newline at end of file From a1a66374203b4b25394081d6125b7aaadc7302d5 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 22 Jan 2026 15:13:18 -0500 Subject: [PATCH 22/32] PIRLinker: Revert to == from IS. Note noqa --- src/pir_pipeline/linking/PIRLinker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pir_pipeline/linking/PIRLinker.py b/src/pir_pipeline/linking/PIRLinker.py index 8194787..71351e6 100644 --- a/src/pir_pipeline/linking/PIRLinker.py +++ b/src/pir_pipeline/linking/PIRLinker.py @@ -152,7 +152,7 @@ def consolidate_uqids(self) -> Self: self._question.groupby(columns + ["year"]).size() == 1 ).reset_index() unique_records = unique_records.groupby(columns).min().reset_index() - unique_records = unique_records[unique_records[0] is True] + unique_records = unique_records[unique_records[0] == True] # noqa: E712 unique_records = unique_records[columns].groupby(columns).first().reset_index() # Get the modal uqid From ad9f7c072c421582ec35a349a12417d1beda6ef8 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 22 Jan 2026 16:58:24 -0500 Subject: [PATCH 23/32] Dashboard: Margin on number-displayed --- src/pir_pipeline/dashboard/static/styles/finalize.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pir_pipeline/dashboard/static/styles/finalize.css b/src/pir_pipeline/dashboard/static/styles/finalize.css index 6a234ee..559f99b 100644 --- a/src/pir_pipeline/dashboard/static/styles/finalize.css +++ b/src/pir_pipeline/dashboard/static/styles/finalize.css @@ -48,4 +48,8 @@ button { .button-container { align-self: flex-end; +} + +.number-displayed { + margin-bottom: 1em; } \ No newline at end of file From 1aa8437f0c3b1d28dd448224ff35f67114fc79ba Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Fri, 23 Jan 2026 09:54:20 -0500 Subject: [PATCH 24/32] Dashboard.finalize: Fix number displayed --- src/pir_pipeline/dashboard/finalize.py | 46 +++++++++++++------ .../templates/finalize/finalize.html | 9 ++-- tests/dashboard/test_finalize_ui.py | 2 +- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/pir_pipeline/dashboard/finalize.py b/src/pir_pipeline/dashboard/finalize.py index fc23928..2d203cb 100644 --- a/src/pir_pipeline/dashboard/finalize.py +++ b/src/pir_pipeline/dashboard/finalize.py @@ -14,7 +14,7 @@ class WrappedList(Sequence): - def __init__(self, iterable: Optional[Iterable], loc: int = 0): + def __init__(self, iterable: Optional[Iterable | str], loc: int = 0): if isinstance(iterable, str): self.collection = json.loads(iterable) elif iterable: @@ -31,6 +31,10 @@ def __getitem__(self, key): def __len__(self): return self.collection.__len__() + def __repr__(self): + string = f"""Pages: {self.collection};\nCurrent Page: {self.loc}""" + return string + @property def current(self): return self[self.loc] @@ -48,24 +52,38 @@ def to_json(self): return json.dumps(js) -def get_page(): - db = get_db() - with db.engine.connect() as connection: - record_count = connection.execute( - text("SELECT COUNT(*) FROM proposed_changes") - ).scalar_one() - session["max_page"] = ceil( - record_count / session.get("number_displayed", DEFAULT_DISPLAYED) +def get_page(number_displayed: Optional[int] = None) -> WrappedList: + if session.get("page") and not number_displayed: + page_tuple = json.loads(session.get("page")) + page_wrapped = WrappedList(*page_tuple) + else: + db = get_db() + denominator = number_displayed or session.get( + "number_displayed", DEFAULT_DISPLAYED ) + loc = 0 + with db.engine.connect() as connection: + record_count = connection.execute( + text("SELECT COUNT(*) FROM proposed_changes") + ).scalar_one() + session["max_page"] = ceil(record_count / denominator) + if session.get("page") and number_displayed: + previous_page = json.loads(session.get("page"))[1] + offset = previous_page * session.get("number_displayed", DEFAULT_DISPLAYED) + loc = offset // number_displayed + + page_range = range(session["max_page"]) + page_list = list(page_range) + page_wrapped = WrappedList(page_list, loc) - return WrappedList(list(range(session["max_page"]))).to_json() + return page_wrapped @bp.route("/", methods=["GET"]) def index(): session["finalize_page"] = 0 session["number_displayed"] = session.get("number_displayed", DEFAULT_DISPLAYED) - session["page"] = get_page() + session["page"] = get_page().to_json() return render_template("finalize/finalize.html") @@ -75,8 +93,7 @@ def data(): db = get_db() number_displayed: int = session.get("number_displayed", DEFAULT_DISPLAYED) - page_tuple = json.loads(session.get("page", get_page())) - page: WrappedList = WrappedList(*page_tuple) + page: WrappedList = get_page() response = request.get_json() direction: str = response.get("direction") @@ -85,9 +102,8 @@ def data(): pass elif direction.isdigit(): number_displayed = int(direction) + page = get_page(number_displayed) session["number_displayed"] = number_displayed - page_tuple = json.loads(get_page()) - page = WrappedList(*page_tuple) elif direction == "next": page.next() else: diff --git a/src/pir_pipeline/dashboard/templates/finalize/finalize.html b/src/pir_pipeline/dashboard/templates/finalize/finalize.html index 2d9c1a6..ffeba3c 100644 --- a/src/pir_pipeline/dashboard/templates/finalize/finalize.html +++ b/src/pir_pipeline/dashboard/templates/finalize/finalize.html @@ -23,11 +23,12 @@ <h3>Finalize Changes</h3> <label for="number-displayed"> Number of Records to Display: </label> + <!-- Written with GPT --> <select name="number-displayed" id="number-displayed" onchange="paginate(event)"> - <option value=5>5</option> - <option value=10 selected="selected">10</option> - <option value=25>25</option> - <option value=50>50</option> + <option value="5" {% if session["number_displayed"] == 5 %}selected{% endif %}>5</option> + <option value="10" {% if session["number_displayed"] == 10 %}selected{% endif %}>10</option> + <option value="25" {% if session["number_displayed"] == 25 %}selected{% endif %}>25</option> + <option value="50" {% if session["number_displayed"] == 50 %}selected{% endif %}>50</option> </select> </div> <div class="table-box"> diff --git a/tests/dashboard/test_finalize_ui.py b/tests/dashboard/test_finalize_ui.py index ce7813e..27605ea 100644 --- a/tests/dashboard/test_finalize_ui.py +++ b/tests/dashboard/test_finalize_ui.py @@ -110,4 +110,4 @@ def test_action_buttons( if __name__ == "__main__": - pytest.main([__file__, "-sk", "test_action_buttons"]) + pytest.main([__file__, "-sk", "test_pagination"]) From ccbe9f9295d44262e6978f8fb57158482726ff63 Mon Sep 17 00:00:00 2001 From: RGilliard-ACF <238555752+RGilliard-ACF@users.noreply.github.com> Date: Sun, 25 Jan 2026 16:45:24 +0000 Subject: [PATCH 25/32] Code formatted with black --- code/uqid_bug_correction.py | 12 ++++-------- src/pir_pipeline/utils/SQLAlchemyUtils.py | 6 ++---- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/code/uqid_bug_correction.py b/code/uqid_bug_correction.py index fdc61f8..52631ca 100644 --- a/code/uqid_bug_correction.py +++ b/code/uqid_bug_correction.py @@ -155,13 +155,11 @@ def post_correction_checks( def add_unique_constraint(): - constraint_query = text( - """ + constraint_query = text(""" ALTER TABLE question ADD CONSTRAINT uq_question_uqid UNIQUE (uqid, year) - """ - ) + """) with SQL_UTILS.engine.begin() as conn: conn.execute(constraint_query) @@ -175,15 +173,13 @@ def revert_or_drop_temp_table(revert: bool = False): for column in QUESTION.c: if not column.primary_key: update_string.append(f"{column.name} = EXCLUDED.{column.name}") - query = text( - f""" + query = text(f""" INSERT INTO question SELECT * FROM question_temp ON CONFLICT ON CONSTRAINT pk_question DO UPDATE SET {', '.join(update_string)} - """ - ) + """) queries.insert(0, query) # Remove unique constraint. diff --git a/src/pir_pipeline/utils/SQLAlchemyUtils.py b/src/pir_pipeline/utils/SQLAlchemyUtils.py index 15163cd..4bf972a 100644 --- a/src/pir_pipeline/utils/SQLAlchemyUtils.py +++ b/src/pir_pipeline/utils/SQLAlchemyUtils.py @@ -169,13 +169,11 @@ def get_columns(self, table: str, where: str = "") -> list[str]: elif self._dialect == "postgresql": table_schema = "table_catalog" - query = text( - f""" + query = text(f""" SELECT column_name FROM information_schema.columns WHERE table_name = :table AND {table_schema} = :schema {where} - """ - ) + """) with self._engine.connect() as conn: result = conn.execute( query, {"table": table, "schema": self._database, "where": where} From 7f19eba9166464fd010ffb74559dcf1a3cbf89de Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Mon, 26 Jan 2026 09:48:35 -0500 Subject: [PATCH 26/32] Bug Correction: Adjust imports --- code/uqid_bug_correction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/uqid_bug_correction.py b/code/uqid_bug_correction.py index fdc61f8..7421403 100644 --- a/code/uqid_bug_correction.py +++ b/code/uqid_bug_correction.py @@ -2,14 +2,14 @@ from typing import MutableSequence from pandas import DataFrame -from sqlalchemy import func, or_, select, text, Select, bindparam +from sqlalchemy import bindparam, func, or_, select, text os.environ["RDS_CREDENTIALS"] = "True" from pir_pipeline.config import DB_CONFIG from pir_pipeline.linking.PIRLinker import PIRLinker -from pir_pipeline.utils.SQLAlchemyUtils import SQLAlchemyUtils from pir_pipeline.utils.dashboard_utils import QuestionLinker +from pir_pipeline.utils.SQLAlchemyUtils import SQLAlchemyUtils SQL_UTILS = SQLAlchemyUtils(**DB_CONFIG, database="pir_data") QUESTION = SQL_UTILS.tables["question"] From 7c2385c9d45fdabe70b7dcbd68a33cdcfe5eca8e Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Mon, 26 Jan 2026 13:34:39 -0500 Subject: [PATCH 27/32] Dashboard.index: Remove .create_db --- src/pir_pipeline/dashboard/index.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pir_pipeline/dashboard/index.py b/src/pir_pipeline/dashboard/index.py index d9c5134..6396015 100644 --- a/src/pir_pipeline/dashboard/index.py +++ b/src/pir_pipeline/dashboard/index.py @@ -13,7 +13,6 @@ def index(): """Return the Home page""" db = get_db() - db.create_db() # Count of questions by year question = db.tables["question"] From f2e19032a7eb4918e97aa57341759d4f572149d3 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Mon, 26 Jan 2026 13:41:29 -0500 Subject: [PATCH 28/32] Adding alembic to the dependencies for future DB management --- alembic.ini | 44 +++++++++++++++++++++ alembic/README | 1 + alembic/env.py | 83 +++++++++++++++++++++++++++++++++++++++ alembic/script.py.mako | 28 +++++++++++++ pyproject.toml | 89 +++++++++++++++++++++++++++++++++++++++++- requirements.txt | 2 +- 6 files changed, 244 insertions(+), 3 deletions(-) create mode 100644 alembic.ini create mode 100644 alembic/README create mode 100644 alembic/env.py create mode 100644 alembic/script.py.mako diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..3d10f0e --- /dev/null +++ b/alembic.ini @@ -0,0 +1,44 @@ +# A generic, single database configuration. + +[alembic] + +# database URL. This is consumed by the user-maintained env.py script only. +# other means of configuring database URLs may be customized within the env.py +# file. +sqlalchemy.url = driver://user:pass@localhost/dbname + + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/README b/alembic/README new file mode 100644 index 0000000..fdacc05 --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +pyproject configuration, based on the generic configuration. \ No newline at end of file diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..be75957 --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,83 @@ +from logging.config import fileConfig + +from sqlalchemy import engine_from_config, pool + +from alembic import context +from pir_pipeline.config import DB_CONFIG, DB_NAME +from pir_pipeline.models.pir_sql_models import sql_metadata +from pir_pipeline.utils.SQLAlchemyUtils import SQLAlchemyUtils + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = sql_metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. +sql_utils = SQLAlchemyUtils(**DB_CONFIG, database=DB_NAME) +config.set_main_option( + "sqlalchemy.url", + sql_utils._engine.url.__to_string__(hide_password=False).replace("%", "%%"), +) + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..1101630 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,28 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + ${downgrades if downgrades else "pass"} diff --git a/pyproject.toml b/pyproject.toml index 9262530..1dc9562 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "mysql-connector-python", "sphinx", "pandas", "python-dotenv", "fuzzywuzzy", "python-Levenshtein", "pytest", "openpyxl", "xlrd", "pydantic", "Flask", "SQLAlchemy", "sqlalchemy-utils", "psycopg[binary]", - "requests", "selenium", "coverage", "boto3", "waitress" + "requests", "selenium", "coverage", "boto3", "waitress", "alembic" ] [build-system] @@ -27,4 +27,89 @@ build-backend = "setuptools.build_meta" Homepage = "https://github.com/HHS/ACF-pir-data" [tool.pytest.ini_options] -testpaths = ["tests"] \ No newline at end of file +testpaths = ["tests"] + +[tool.alembic] + +# path to migration scripts. +# this is typically a path given in POSIX (e.g. forward slashes) +# format, relative to the token %(here)s which refers to the location of this +# ini file +script_location = "%(here)s/alembic" + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = "%%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s" +# Or organize into date-based subdirectories (requires recursive_version_locations = true) +# file_template = "%%(year)d/%%(month).2d/%%(day).2d_%%(hour).2d%%(minute).2d_%%(second).2d_%%(rev)s_%%(slug)s" + +# additional paths to be prepended to sys.path. defaults to the current working directory. +prepend_sys_path = [ + "." +] + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the tzdata library which can be installed by adding +# `alembic[tz]` to the pip requirements. +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to <script_location>/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# version_locations = [ +# "%(here)s/alembic/versions", +# "%(here)s/foo/bar" +# ] + + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = "utf-8" + +# This section defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples +# [[tool.alembic.post_write_hooks]] +# format using "black" - use the console_scripts runner, +# against the "black" entrypoint +# name = "black" +# type = "console_scripts" +# entrypoint = "black" +# options = "-l 79 REVISION_SCRIPT_FILENAME" +# +# [[tool.alembic.post_write_hooks]] +# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module +# name = "ruff" +# type = "module" +# module = "ruff" +# options = "check --fix REVISION_SCRIPT_FILENAME" +# +# [[tool.alembic.post_write_hooks]] +# Alternatively, use the exec runner to execute a binary found on your PATH +# name = "ruff" +# type = "exec" +# executable = "ruff" +# options = "check --fix REVISION_SCRIPT_FILENAME" + diff --git a/requirements.txt b/requirements.txt index 21602cf..3a99cda 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ openpyxl==3.1.5 outcome==1.3.0.post0 packaging==25.0 pandas==2.2.3 --e git+https://github.com/HHS/ACF-pir-data.git@0104f01060b03cdcbef74e0f8008782c8059cd32#egg=pir_pipeline +-e git+https://github.com/HHS/ACF-pir-data.git@d1bbee31e9490b6b089a91ffaed915bd9eff2364#egg=pir_pipeline pluggy==1.5.0 psycopg==3.2.7 psycopg-binary==3.2.7 From 966c0568366388f352b77a424ffac19a86560f7a Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 29 Jan 2026 09:49:36 -0500 Subject: [PATCH 29/32] Dashboard: Provide HTML to review/link from search page --- src/pir_pipeline/dashboard/static/search/search.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pir_pipeline/dashboard/static/search/search.js b/src/pir_pipeline/dashboard/static/search/search.js index 332216f..6dd0026 100644 --- a/src/pir_pipeline/dashboard/static/search/search.js +++ b/src/pir_pipeline/dashboard/static/search/search.js @@ -79,10 +79,12 @@ function getFlashcardData(e) { function commitChanges(e) { e.preventDefault(); + const questionTable = document.getElementById("flashcard-question-table"); + // Commit changes to the database const payload = { "action": "store", - "data": "" + "html": questionTable.outerHTML } fetch("/review/link", { From a7426b2ffa6153ff115db7f7716e832fb915d1a0 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 29 Jan 2026 09:50:08 -0500 Subject: [PATCH 30/32] Dashboard.review: Correct link_dict --- src/pir_pipeline/dashboard/review.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pir_pipeline/dashboard/review.py b/src/pir_pipeline/dashboard/review.py index 88f64da..e439b6e 100644 --- a/src/pir_pipeline/dashboard/review.py +++ b/src/pir_pipeline/dashboard/review.py @@ -5,7 +5,7 @@ from hashlib import sha1 from flask import Blueprint, render_template, request, session -from sqlalchemy import delete, func, select +from sqlalchemy import bindparam, delete, func, select from pir_pipeline.dashboard.db import get_db from pir_pipeline.utils.dashboard_utils import ( @@ -180,9 +180,10 @@ def link(): # Execute all linking actions elif action == "confirm": proposed_changes = db.tables["proposed_changes"] - link_dict = db.get_scalar( - select(proposed_changes.c["link_dict"]), {"id": payload["id"]} + link_dict_query = select(proposed_changes.c["link_dict"]).where( + proposed_changes.c["id"] == bindparam("id") ) + link_dict = db.get_scalar(link_dict_query, {"id": payload["id"]}) QuestionLinker(link_dict, db).update_links() delete_query = delete(proposed_changes).where( proposed_changes.c["id"] == payload["id"] From ff87c42a496e3373e1cfff5220ba1c0f8b8df8e1 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 29 Jan 2026 09:50:42 -0500 Subject: [PATCH 31/32] dashboard_utils: Correct QuestionLinker logic --- src/pir_pipeline/utils/dashboard_utils.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/pir_pipeline/utils/dashboard_utils.py b/src/pir_pipeline/utils/dashboard_utils.py index d960ca9..3334954 100644 --- a/src/pir_pipeline/utils/dashboard_utils.py +++ b/src/pir_pipeline/utils/dashboard_utils.py @@ -409,10 +409,19 @@ def link(self): ) # Matching two questions with one or no uqid - elif not base_uqid: - if not match_uqid: + else: + if not match_uqid and not base_uqid: qids_encoded = (base_qid + match_qid).encode("utf-8") match_uqid = sha1(qids_encoded).hexdigest() + elif base_uqid or match_uqid: + if match_uqid: + pass + elif base_uqid: + match_uqid = base_uqid + base_qid = match_qid + + changes.base["new_uqid"] = match_uqid + changes.match["new_uqid"] = match_uqid self._db.update_records( question, From c054a469f1d21f69925ca617db6ad47359140497 Mon Sep 17 00:00:00 2001 From: Gilliard Reggie <reggie.gilliard@ohs-dv-adm-rg-l.acf.hhs.local> Date: Thu, 29 Jan 2026 09:50:55 -0500 Subject: [PATCH 32/32] Test fixes --- tests/conftest.py | 14 +++++++------- tests/dashboard/test_review_ui.py | 4 +--- tests/dashboard/test_search_ui.py | 4 +--- tests/utils/test_dashboard_utils.py | 10 +++++----- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 471b93e..465075a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -378,42 +378,42 @@ def question_linker_records(): @pytest.fixture def question_linker_payload(): - payload = { - "1": { + payload = [ + { "link_type": "unlink", "base_question_id": "443a354c772a24df0c2bba9acf568576a3b7d182", "base_uqid": "6c9f2d2dcd01048e5163e12a1f50cb8e7c926e43", "match_question_id": "d27e8217ba30000a78e5d92ea54f4d9a2e69cb54", "match_uqid": "6c9f2d2dcd01048e5163e12a1f50cb8e7c926e43", }, - "2": { + { "link_type": "unlink", "base_question_id": "b5c2dd4e8fe4523405cfcd2753da583d669db2af", "base_uqid": "e358fa167d3a9d5872f563ec5cfa07898a7ee186", "match_question_id": "dd089009542dfd51e45f2f23826dbc7a6f6a912e", "match_uqid": "e358fa167d3a9d5872f563ec5cfa07898a7ee186", }, - "3": { + { "link_type": "link", "base_question_id": "8e96da390b28ba6f5571ee1f68716cca982ccca0", "base_uqid": "a488b1e05a2b7b462c3fafb5b6c3536704c39959", "match_question_id": "a01842852de3fa9893856eb711efc3e34fdc1037", "match_uqid": "0e9fdf808ccf193218f64d62ab9b0c60f860de6b", }, - "4": { + { "link_type": "link", "base_question_id": "0e93c25d3a95604f40d3a64e2298093b4faed6f2", "base_uqid": None, "match_question_id": "66e92dd434dc3cccc5e14e3ad4ce710be8c7fb9d", "match_uqid": "ca2fca6b6b932b8fcbf9c92b66a9605b5a5b7f81", }, - "5": { + { "link_type": "confirm", "base_question_id": "8143708cc4a248c38504fbd783646557b23665df", "base_uqid": "a5d26ad90fec036826376e3be8425e9749c7160c", "match_question_id": None, "match_uqid": None, }, - } + ] return payload diff --git a/tests/dashboard/test_review_ui.py b/tests/dashboard/test_review_ui.py index 36754af..8962ff5 100644 --- a/tests/dashboard/test_review_ui.py +++ b/tests/dashboard/test_review_ui.py @@ -1,4 +1,3 @@ -import json import os import time @@ -150,9 +149,8 @@ def count_modal_rows(table: str): # Confirm there is a record in proposed_changes result = conn.execute(text("SELECT link_dict FROM proposed_changes")) link_dict = result.scalar() - link_dict = json.loads(link_dict) question_ids = [] - for link in link_dict.values(): + for link in link_dict: for qid in ["base_question_id", "match_question_id"]: question_ids.append(link[qid]) diff --git a/tests/dashboard/test_search_ui.py b/tests/dashboard/test_search_ui.py index f756b60..c9e75a1 100644 --- a/tests/dashboard/test_search_ui.py +++ b/tests/dashboard/test_search_ui.py @@ -1,4 +1,3 @@ -import json import os import time @@ -164,9 +163,8 @@ def count_modal_rows(table: str): # Confirm there is a record in proposed_changes result = conn.execute(text("SELECT link_dict FROM proposed_changes")) link_dict = result.scalar() - link_dict = json.loads(link_dict) question_ids = [] - for link in link_dict.values(): + for link in link_dict: for qid in ["base_question_id", "match_question_id"]: question_ids.append(link[qid]) diff --git a/tests/utils/test_dashboard_utils.py b/tests/utils/test_dashboard_utils.py index 1521a90..a56cc97 100644 --- a/tests/utils/test_dashboard_utils.py +++ b/tests/utils/test_dashboard_utils.py @@ -120,7 +120,7 @@ def get_ids(payload: dict): # In case 1, both uqids should be totally removed from the database with sql_utils.engine.connect() as conn: base_qid, base_uqid, match_qid, match_uqid = get_ids( - question_linker_payload["1"] + question_linker_payload[0] ) result = conn.execute( select(question_table).where(question_table.c["uqid"] == base_uqid) @@ -162,7 +162,7 @@ def get_ids(payload: dict): # In case 2, uqid for match_qid should remain the same, uqid for base_qid should change with sql_utils.engine.connect() as conn: base_qid, base_uqid, match_qid, match_uqid = get_ids( - question_linker_payload["2"] + question_linker_payload[1] ) result = conn.execute( select(question_table.c["uqid"]) @@ -200,7 +200,7 @@ def get_ids(payload: dict): # In case 3, base uqid should be kept the other should be removed with sql_utils.engine.connect() as conn: base_qid, base_uqid, match_qid, match_uqid = get_ids( - question_linker_payload["3"] + question_linker_payload[2] ) # Base question has base uqid result = conn.execute( @@ -254,7 +254,7 @@ def get_ids(payload: dict): # In case 4 match keeps uqid, base gets match uqid with sql_utils.engine.connect() as conn: base_qid, base_uqid, match_qid, match_uqid = get_ids( - question_linker_payload["4"] + question_linker_payload[3] ) # Base question has match uqid @@ -300,7 +300,7 @@ def get_ids(payload: dict): # In case 5 record should appear as confirmed in the changelog with sql_utils.engine.connect() as conn: base_qid, base_uqid, match_qid, match_uqid = get_ids( - question_linker_payload["5"] + question_linker_payload[4] ) result = conn.execute(