Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,170 changes: 1,170 additions & 0 deletions projects/acf/acf_parse-docx-to-xml_CA.py

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions projects/acf/acf_parse_config.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
{
"input_doc": "C:/ACF/FINAL APPROVED STATE RECORDS/Yurok Tribe/Yurok Tribe_Copy.docx",
"out_dir": "C:/ACF/FINAL APPROVED STATE RECORDS/Yurok Tribe",
"input_doc": "C:/ACF/FINAL APPROVED STATE RECORDS/California/California_Copy.docx",
"out_dir": "C:/ACF/FINAL APPROVED STATE RECORDS/California",
"xsd_file": "schema_final.xsd",
"xsl_file": "statetemplatev5.xsl",
"xsl_file": "statetemplatev5_california.xsl",
"category": "false",
"titleName": "Title",
"subtitleName": "",
"articleName": "Chapter",
"partName": [],
"subPartName": "",
"titleContent": "false",
"state": "Yurok Tribe",
"state_code_pattern": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(YTC\\s[\\d\\.]+)\\)?$",
"statute_pattern": "^(YTC\\s[\\d\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"titleContent": "true",
"titleName": "Code",
"subtitleName": ["Division", "Title", "Part"],
"articleName": ["Part", "Division", "Title"],
"partName": ["Chapter", "Part"],
"subPartName": "Chapter",
"state": "California",
"state_code_pattern": "(.+?)\\s?\\u2013\\s?(.*?)\\s+\\(?(CA\\s(?:Family Code|Government Code|Health and Safety Code|Welfare and Institutions Code|Penal Code|Civil Code)\\s\\u00A7\\s[-\\d\\.\\s]+)\\)?$",
"statute_pattern": "^(CA\\s(?:Family Code|Government Code|Health and Safety Code|Welfare and Institutions Code|Penal Code|Civil Code)\\s\\u00A7\\s[-\\d\\.\\s]+)\\s+[\\u2013\\u2014]\\s+(.*)$",
"patterns": {
"base": "(.+)\\s?\\u2013\\s?(.*)\\s+",
"Alabama": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(AL\\sCode\\s\\u00A7\\s[-\\w\\.]+)\\)?$",
Expand Down
3 changes: 3 additions & 0 deletions projects/acf/docx-fixes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ DOCX Fixes
- Replace: --
- With: –

- Replace: --
- With: —

4) Ensure consistent text with statute information
- Use "Definitions related to" instead of "Definitions for"

Expand Down
3 changes: 3 additions & 0 deletions projects/acf/schema_final.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
<xs:element ref="number"/>
<xs:element ref="name"/>
<xs:element ref="source"/>
<xs:element minOccurs="0" ref="altName"/>
<xs:element minOccurs="0" ref="part"/>
<xs:element ref="associatedFederalRecords"/>
<xs:choice minOccurs="1" maxOccurs="1">
Expand All @@ -108,6 +109,7 @@
<xs:sequence>
<xs:element ref="domain"/>
<xs:element minOccurs="0" ref="subtitle"/>
<xs:element minOccurs="0" ref="part"/>
<xs:element ref="associatedFederalRecords"/>
<xs:choice minOccurs="1" maxOccurs="1">
<xs:sequence>
Expand All @@ -128,6 +130,7 @@
<xs:element ref="number"/>
<xs:element ref="name"/>
<xs:element ref="source"/>
<xs:element minOccurs="0" ref="altName"/>
</xs:sequence>
</xs:complexType>
</xs:element>
Expand Down
113 changes: 113 additions & 0 deletions projects/acf/state_configs/AR_acf_parse_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
"input_doc": "C:/ACF/FINAL APPROVED STATE RECORDS/Arkansas/Arkansas_Copy.docx",
"out_dir": "C:/ACF/FINAL APPROVED STATE RECORDS/Arkansas",
"xsd_file": "schema_final.xsd",
"xsl_file": "statetemplatev5.xsl",
"category": "false",
"titleName": "Title",
"subtitleName": "Subtitle",
"articleName": "Chapter",
"partName": ["Subchapter"],
"subPartName": "",
"titleContent": "false",
"state": "Arkansas",
"state_code_pattern": "(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(Ark\\.\\sCode\\sAnn\\.\\s\\u00A7\\s[-\\d]+)\\)?$",
"statute_pattern": "^(Ark\\.\\sCode\\sAnn\\.\\s\\u00A7\\s[-\\d]+)\\s+[\u2013\u2014]\\s+(.*)$",
"patterns": {
"base": "(.+)\\s?\\u2013\\s?(.*)\\s+",
"Alabama": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(AL\\sCode\\s\\u00A7\\s[-\\w\\.]+)\\)?$",
"Arizona": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(AZ\\sRev\\sStat\\s\\u00A7\\s\\d+-[-\\w\\.]+)\\)?$",
"Colorado": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(CO\\sCode\\s\\u00A7\\s\\[-\\d\\.]+)\\)?$",
"Connecticut": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(CT\\sGen\\sStat\\s\\u00A7\\s[-\\w]+)\\)?$",
"Coquille": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(C\\.I\\.T\\.C\\sStat\\s\\u00A7\\s[\\d\\.]+)\\)?$",
"Delaware": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(\\d+\\sDE\\sCode\\s\\u00A7\\s[\\w\\d]+)\\)?$",
"Flandreau": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(Flandreau\\sSantee\\sSioux\\sTribal\\sCode\\s\\u00A7\\s[-\\d]+)\\)?$",
"Florida": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(FLA\\.\\sSTAT\\.\\s\\u00A7\\s\\d+\\.\\d+)\\)?$",
"Georgia": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(GA\\sCode\\s\\u00A7\\s[-\\d\\.]+)\\)?$",
"Hawaii": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(HI\\sRev\\sStat\\s\\u00A7[-\\w\\.]+)\\)?$",
"Idaho": "(.+)\\s?\\u2013\\s?(.*)\\s+\\(?(I\\.C\\.\\sStat\\s\\u00A7\\s[-\\w]+)\\)?$",
"Illinois": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(\\d+\\sILCS\\s[-\\d\\.\\/]+)\\)?$",
"Indiana": "(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(IN Code\\s\\u00A7\\s[-\\.\\w]+)\\)?$",
"Iowa": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(IA\\sCode\\s\\u00A7\\s[\\w\\.]+)\\)?$",
"Kansas": "(.+)\\s?\\u2013\\s?(.*)\\s+\\(?(K\\.S\\.\\sStat\\s\\u00A7\\s[-\\d]+)\\)?$",
"Kentucky": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(K\\.Y\\.\\sStat\\s\\u00A7\\s[-\\w\\.]+)\\)?$",
"Louisiana": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(LA\\sRev\\sStat\\s\\u00A7\\s[:\\d\\.]+)\\)?$",
"Maine": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(M\\.R\\.S\\.\\s[-\\w]+\\s\\u00A7\\s[-\\w]+)\\)?$",
"Maryland": "(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(MD Code,?\\s(?:Family Law|Local Government|General Provisions|Health\\s*[-\\u2013]\\s*General|Human Services),?\\s\u00A7\\s\\d+(?:\\.\\d+)?-[-–\\.\\w]*)\\)?$",
"Massachusetts": "(.+)\\s?\\u2013\\s?(.*)\\s+\\(?(MA Gen L.+\\d+[A-Za-z]*)\\s*\\)?$",
"Michigan": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(MI\\sComp\\sLaws\\s\\u00A7\\s[\\w\\.]+)\\)?$",
"Minnesota": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(MN\\sStat\\s\\u00A7\\s[\\w\\.]+)\\)?$",
"Mississippi": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(MS\\sCode\\s\\u00A7\\s?[-\\w\\.]+)\\)?$",
"Muscogee": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(M\\.C\\.N\\.\\sStat\\s\\u00A7\\s?[-\\d]+)\\)?$",
"Navajo Nation": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(\\d+\\sN\\.N\\.C\\.\\s\\u00A7\\s\\d+),\\sp\\.\\d+\\)?$",
"New Hampshire": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(N\\.H\\.\\sStat\\s\\u00A7\\s[-\\w:]+)\\)?$",
"New Jersey": "(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(NJ Rev Stat \\u00A7 \\d+:\\d+[-\\.\\w]*)\\)?$",
"New York": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(NY\\s.+\\s\\u00A7\\s[-\\w\\.]+)\\)?$",
"Nevada": "(.+)\\s?\\u2013\\s?(.*)\\s+\\(?(NRS\\s[A-Za-z0-9\\.]+)\\)?$",
"North Carolina": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(NC\\sGen\\sStat\\s\\u00A7\\s[-\\w\\.]+)\\)?$",
"North Dakota": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(N\\.D\\.\\sStat\\s\\u00A7\\s[-\\w\\.]+)\\)?$",
"Ohio": "(.+?)\\s?\\u2013\\s?(.*?)\\s+\\(?(Ohio Rev Code\\s\\u00A7\\s+\\d+(?:\\.\\d*)?)\\)?$",
"Oklahoma": "(.+?)\\s?\\u2013\\s?(.*?)\\s+\\(?(\\d+[A-Za-z]?\\sOK STAT \\u00A7\\s[\\d-]+([A-Za-z0-9\\-\\.]+)?)\\)?$",
"Oregon": "(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(ORS\\s+[A-Za-z0-9\\.]+)\\)?$",
"Pennsylvania": "(.+?)\\s?\\u2013\\s?(.*?)\\s+\\(?(\\d+\\sPA\\sCons\\sStat\\s\\u00A7\\s[\\.\\w]+)\\)?$",
"Potawatomi": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(P\\.N\\.\\sStat\\s\\u00A7\\s[-\\d]+)\\)?$",
"Rhode Island": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(R\\.I\\.\\sStat\\s\\u00A7\\s[-\\d\\.]+)\\)?$",
"South Carolina": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\((SC\\sCode\\s\\u00A7\\s[-\\d]+)\\)\\.?$",
"Tennessee": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(TN Code\\s\\u00A7\\s[-\\.\\w]+)\\)?$",
"Texas": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(TX\\s(?:Hum Res Code|Govt Code|Health & Safety Code|Fam Code)\\s\\u00A7\\s[\\d\\.]+)\\)?$",
"Vermont": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(\\d+[A-Z]?\\sV\\.S\\.A\\.\\s\\u00A7\\s[-\\w]+)\\)?$",
"Virginia": "^(VA Code\\s\\u00A7\\s+\\d+(?:\\.\\d+)?-\\d+(?:\\.\\d+)?(?::\\d+)?)\\s+[\\u2013\\u2014]\\s+(.*)$",
"West Virginia": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(W\\.V\\.\\sStat\\s\\u00A7\\s[-\\w]+)\\)?$",
"Wisconsin": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(WI\\sStat\\s\\u00A7\\s[\\d\\.]+)\\)?$",
"Wyoming": "^(.+?)\\s?\\u2013\\s?(.*?)\\s?\\(?(WY\\sStat\\s\\u00A7\\s[-\\w\\.]+)\\)?$",
"Yurok Tribe": "(.+?)\\s?\\u2013\\s?(.*?)\\s+\\(?(YTC\\s[\\.\\d]+)\\)?$"
},
"state_statutes": {
"Alabama": "^(AL\\sCode\\s\\u00A7\\s[-\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Arizona": "^(AZ\\sRev\\sStat\\s\\u00A7\\s\\d+-[-\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Colorado": "^(CO\\sCode\\s\\u00A7\\s\\[-\\d\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Connecticut": "^(CT\\sGen\\sStat\\s\\u00A7\\s[-\\w]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Coquille": "^(C\\.I\\.T\\.C\\sStat\\s\\u00A7\\s[\\d\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Delaware": "^(\\d+\\sDE\\sCode\\s\\u00A7\\s\\[\\w\\d]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Flandreau": "^(Flandreau\\sSantee\\sSioux\\sTribal\\sCode\\s\\u00A7\\s[-\\d]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Florida": "^(FLA\\.\\sSTAT\\.\\s\\u00A7\\s\\d+\\.\\d+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Georgia": "^(GA\\sCode\\s\\u00A7\\s[-\\d\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Hawaii": "^(HI\\sRev\\sStat\\s\\u00A7[-\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Idaho": "^(I\\.C\\.\\sStat\\s\\u00A7\\s[-\\w]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Illinois": "^(\\d+\\sILCS\\s[-\\d\\.\\/]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Indiana": "^(IN Code\\s\\u00A7\\s[-\\.\\w]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Iowa": "^(IA\\sCode\\s\\u00A7\\s[\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Kansas": "^(K\\.S\\.\\sStat\\s\\u00A7\\s[-\\d]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Kentucky": "^(K\\.Y\\.\\sStat\\s\\u00A7\\s[-\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Louisiana": "^(LA\\sRev\\sStat\\s\\u00A7\\s[:\\d\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Maine": "^(M\\.R\\.S\\.\\s[-\\w]+\\s\\u00A7\\s[-\\w]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Maryland": "^(MD Code,?\\s(?:Family Law|Local Government|General Provisions|Health\\s*[-\\u2013]\\s*General|Human Services),?\\s\u00A7\\s\\d+(?:\\.\\d+)?-[-\\.\\w]*)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Massachusetts": "^(MA Gen L.+\\d+[A-Za-z]*)\\s?[\\u2013\\u2014]\\s?(.*)",
"Michigan": "^(MI\\sComp\\sLaws\\s\\u00A7\\s[\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Minnesota": "^(MN\\sStat\\s\\u00A7\\s[\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Mississippi": "^(MS\\sCode\\s\\u00A7\\s?[-\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Muscogee": "^(M\\.C\\.N\\.\\sStat\\s\\u00A7\\s?[-\\d]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Navajo Nation": "^(\\d+\\sN\\.N\\.C\\.\\s\\u00A7\\s\\d+),\\sp\\.\\d+\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Nevada": "^(NRS\\s[A-Za-z0-9\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"New Hampshire": "^(N\\.H\\.\\sStat\\s\\u00A7\\s[-\\w:]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"New Jersey": "^(NJ Rev Stat \\u00A7 \\d+:\\d+[-\\.\\w]*)\\s+[\u2013\u2014]\\s+(.*)$",
"New York": "^(NY\\s.+\\s\\u00A7\\s[-\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"North Carolina": "^(NC\\sGen\\sStat\\s\\u00A7\\s[-\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"North Dakota": "^(N\\.D\\.\\sStat\\s\\u00A7\\s[-\\w\\.\\s\\(\\)]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Oklahoma": "^(\\d+[A-Za-z]?\\sOK STAT \\u00A7\\s[\\d-]+[A-Za-z0-9\\-\\.]+)\\s?[\\u2013\\u2014]\\s?(.*)",
"Ohio": "^(Ohio Rev Code\\s\\u00A7\\s+\\d+(?:\\.\\d*-)?)\\s+[\u2013\u2014]\\s+(.*)",
"Oregon": "^(ORS\\s+[A-Za-z0-9\\.]+)\\s+[\\u2013\\u2014]\\s+(.*)$",
"Pennsylvania": "^(\\d+\\sPA\\sCons\\sStat\\s\\u00A7\\s[\\.\\w]+)\\s+[\\u2013\\u2014]\\s+(.*)$",
"Potawatomi": "^(P\\.N\\.\\sStat\\s\\u00A7\\s[-\\d]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Rhode Island": "^(R\\.I\\.\\sStat\\s\\u00A7\\s[-\\d\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"South Carolina": "^(SC\\sCode\\s\\u00A7\\s[-\\d]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Tennessee": "^(TN Code\\s\\u00A7\\s[-\\.\\w]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Texas": "^(TX\\s(?:Hum Res Code|Govt Code|Health & Safety Code|Fam Code)\\s\u00A7\\s[\\d\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Vermont": "^(\\d+[A-Z]?\\sV\\.S\\.A\\.\\s\\u00A7\\s[-\\w]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Virginia": "(.+?)\\s?\\u2013\\s?(.*?)\\s+\\(?(VA Code\\s\\u00A7\\s+\\d+(?:\\.\\d+)?-\\d+(?:\\.\\d+)?(?::\\d+)?)\\)?$",
"West Virginia": "^(W\\.V\\.\\sStat\\s\\u00A7\\s[-\\w]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Wyoming": "^(WY\\sStat\\s\\u00A7\\s[-\\w\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Wisconsin": "^(WI\\sStat\\s\\u00A7\\s[\\d\\.]+)\\s+[-\\u2013\\u2014]\\s+(.*)$",
"Yurok Tribe": "^(YTC\\s[\\.\\d]+)\\s+[\\u2013\\u2014]\\s+(.*)$"
}
}
Loading