-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathscraper_utils.py
More file actions
123 lines (119 loc) · 2.79 KB
/
scraper_utils.py
File metadata and controls
123 lines (119 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Custom headers to simulate a browser
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
}
JUSTIA_BASE_URL = "https://law.justia.com"
CODES_BASE_URL = "https://law.justia.com/codes/"
REGULATIONS_BASE_URL = "https://regulations.justia.com"
FAILED_FAILPATH = "failed.jsonl"
CUSTOM_SEP = chr(8250)
JUR_URL_MAP = {
"AL": "alabama",
"AK": "alaska",
"AZ": "arizona",
"AR": "arkansas",
"CA": "california",
"CO": "colorado",
"CT": "connecticut",
"DE": "delaware",
"FL": "florida",
"GA": "georgia",
"HI": "hawaii",
"ID": "idaho",
"IL": "illinois",
"IN": "indiana",
"IA": "iowa",
"KS": "kansas",
"KY": "kentucky",
"LA": "louisiana",
"ME": "maine",
"MD": "maryland",
"MA": "massachusetts",
"MI": "michigan",
"MN": "minnesota",
"MS": "mississippi",
"MO": "missouri",
"MT": "montana",
"NE": "nebraska",
"NV": "nevada",
"NH": "new-hampshire",
"NJ": "new-jersey",
"NM": "new-mexico",
"NY": "new-york",
"NC": "north-carolina",
"ND": "north-dakota",
"OH": "ohio",
"OK": "oklahoma",
"OR": "oregon",
"PA": "pennsylvania",
"RI": "rhode-island",
"SC": "south-carolina",
"SD": "south-dakota",
"TN": "tennessee",
"TX": "texas",
"UT": "utah",
"VT": "vermont",
"VA": "virginia",
"WA": "washington",
"WV": "west-virginia",
"WI": "wisconsin",
"WY": "wyoming",
"DC": "district-of-columbia",
"PR": "puerto-rico",
"VI": "virgin-islands",
}
JUR_NAME_MAP = {
"AL": "Alabama",
"AK": "Alaska",
"AZ": "Arizona",
"AR": "Arkansas",
"CA": "California",
"CO": "Colorado",
"CT": "Connecticut",
"DE": "Delaware",
"FL": "Florida",
"GA": "Georgia",
"HI": "Hawaii",
"ID": "Idaho",
"IL": "Illinois",
"IN": "Indiana",
"IA": "Iowa",
"KS": "Kansas",
"KY": "Kentucky",
"LA": "Louisiana",
"ME": "Maine",
"MD": "Maryland",
"MA": "Massachusetts",
"MI": "Michigan",
"MN": "Minnesota",
"MS": "Mississippi",
"MO": "Missouri",
"MT": "Montana",
"NE": "Nebraska",
"NV": "Nevada",
"NH": "New Hampshire",
"NJ": "New Jersey",
"NM": "New Mexico",
"NY": "New York",
"NC": "North Carolina",
"ND": "North Dakota",
"OH": "Ohio",
"OK": "Oklahoma",
"OR": "Oregon",
"PA": "Pennsylvania",
"RI": "Rhode Island",
"SC": "South Carolina",
"SD": "South Dakota",
"TN": "Tennessee",
"TX": "Texas",
"UT": "Utah",
"VT": "Vermont",
"VA": "Virginia",
"WA": "Washington",
"WV": "West Virginia",
"WI": "Wisconsin",
"WY": "Wyoming",
"DC": "District of Columbia",
"PR": "Puerto Rico",
"VI": "U.S. Virgin Islands",
}