From d90ed222cb322c771c20b6af6386605907d42d43 Mon Sep 17 00:00:00 2001 From: A Taylor <112668339+ATaylorAerospace@users.noreply.github.com> Date: Sun, 29 Mar 2026 10:50:01 -0700 Subject: [PATCH] Add unit tests for citation extraction functionality Added unit tests for citation extraction tools to verify correct extraction of case, statute, and regulation citations, as well as handling of texts without citations --- tests/test_tools/test_citations | 66 +++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 tests/test_tools/test_citations diff --git a/tests/test_tools/test_citations b/tests/test_tools/test_citations new file mode 100644 index 0000000..8603c64 --- /dev/null +++ b/tests/test_tools/test_citations @@ -0,0 +1,66 @@ +"""Tests for citation extraction tools.""" + +from factor.tools.citations import extract_citations + + +def test_extract_case_citation(): + """Case citations (Party v. Party) should be extracted.""" + text = "As established in Smith v. Jones, 123 F.Supp 456, the court ruled in favor." + results = extract_citations(text=text) + cases = [r for r in results if r["type"] == "case"] + assert len(cases) >= 1 + assert cases[0]["plaintiff"].strip() == "Smith" + assert cases[0]["defendant"].strip().startswith("Jones") + assert cases[0]["is_synthetic"] is True + assert "warning" in cases[0] + + +def test_extract_statute_citation(): + """Statute citations (Title USC Section) should be extracted.""" + text = "Pursuant to 42 USC 1983, the plaintiff has standing to bring this action." + results = extract_citations(text=text) + statutes = [r for r in results if r["type"] == "statute"] + assert len(statutes) >= 1 + assert statutes[0]["title"] == "42" + assert statutes[0]["is_synthetic"] is True + + +def test_extract_regulation_citation(): + """Federal Register citations should be extracted.""" + text = "The rule was published at 89 Fed. Reg. 12345 on March 1." + results = extract_citations(text=text) + regs = [r for r in results if r["type"] == "regulation"] + assert len(regs) >= 1 + assert regs[0]["volume"] == "89" + assert regs[0]["page"] == "12345" + assert regs[0]["is_synthetic"] is True + + +def test_extract_no_citations(): + """Text without citations should return an empty list.""" + results = extract_citations(text="This is plain text with no legal citations at all.") + assert results == [] + + +def test_extract_multiple_types(): + """Text with mixed citation types should extract all of them.""" + text = ( + "In Adams v. Baker, 500 F.2d 100, the court cited 28 USC 1332. " + "The regulation at 75 Fed. Reg. 9999 was also referenced." + ) + results = extract_citations(text=text) + types_found = {r["type"] for r in results} + assert "case" in types_found + assert "statute" in types_found + assert "regulation" in types_found + + +def test_all_citations_marked_synthetic(): + """Every extracted citation must carry is_synthetic and a warning.""" + text = "Smith v. Jones, 123 F.Supp 456. See also 28 USC 1332." + results = extract_citations(text=text) + assert len(results) >= 2 + for citation in results: + assert citation["is_synthetic"] is True + assert "warning" in citation + assert "synthetic" in citation["warning"].lower()