From b253de0811893a10fbe3ed672cfe53c8c545916a Mon Sep 17 00:00:00 2001 From: hannah cushman garland Date: Fri, 31 Mar 2023 13:58:28 -0500 Subject: [PATCH 1/2] Update precinct result scraper --- README.md | 2 -- chi_elections/precincts.py | 45 ++++++++++++++++++++------------------ 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 00429de..b55689a 100644 --- a/README.md +++ b/README.md @@ -71,8 +71,6 @@ client = SummaryClient(url='http://www.chicagoelections.com/results/ap/summary.t Precinct Results ---------------- -**N.b., The format of precinct results has changed and needs to be updated.** - After election night, precinct-level results are published to https://chicagoelections.com/en/election-results.html. The results are HTML files, so we have to scrape the results from HTML tables. ### Results client diff --git a/chi_elections/precincts.py b/chi_elections/precincts.py index 7767302..7f3109c 100644 --- a/chi_elections/precincts.py +++ b/chi_elections/precincts.py @@ -8,7 +8,7 @@ import requests class Election(object): - ELECTION_URL = 'https://chicagoelections.com/en/election-results.asp' + ELECTION_URL = 'https://chicagoelections.gov/en/election-results.asp' def __init__(self, elec_code, name, session): self.elec_code = elec_code @@ -28,7 +28,7 @@ def turnout(self): def races(self): response = self.session.get(self.url, params={'election': self.elec_code}) - page = lxml.html.fromstring(response.text) + page = lxml.html.fromstring(response.content.decode('utf-8')) option_els = page.xpath( "//select[@name='race']/option") @@ -48,7 +48,7 @@ def races(self): return races class Race(object): - RESULTS_URL = 'https://chicagoelections.com/en/data-export.asp' + RESULTS_URL = 'https://chicagoelections.gov/en/election-results-specifics.asp' def __init__(self, elec_code, name=None, number=None, session=None): self.elec_code = elec_code @@ -72,33 +72,36 @@ def precincts(self): results_d = {} response = self.session.get(self.RESULTS_URL, - params = {'election': self.elec_code, - 'race': self.number}) - page = lxml.html.fromstring(response.text) + data={'election': self.elec_code, + 'race': self.number}) + page = lxml.html.fromstring(response.content.decode('utf-8')) tables = page.xpath('//table') - - total = tables.pop(0) - header_row = total.xpath('./tr')[0] - keys = ['precinct'] - for cell in header_row.xpath('./td//text()'): - keys.append(cell.strip().replace("''", "'")) + tables.pop(0) # Discard total for table in tables: - rows = table.xpath('./tr') - ward = rows.pop(0).xpath('./td//text()')[0] + title, header = table.xpath('./thead/tr') + ward, = title.xpath('./th/b/text()') ward_num = int(ward.split()[-1]) - rows.pop(0) # ignore repeated header row - for row in rows: + keys = [field.strip() for field in header.xpath('./td/b/text()')] + + for row in table.xpath('./tr'): votes = {} + for key, cell in zip(keys, row.xpath('./td//text()')): - if cell == 'Total': # ignore ward subtotals + if cell == 'Total': # Ignore ward subtotals break - if '%' in key or key == 'Votes': # ignore derived vars + + if key == 'Precinct': + precinct = int(cell) continue + + elif key == '%': # Ignore derived vars + continue + votes[key] = int(cell.strip().replace(',', '')) + else: - precinct = votes.pop('precinct') results_d[(ward_num, precinct)] = votes return results_d @@ -132,7 +135,7 @@ def total(self): def elections(session=None): '''List all available elections''' - election_url = 'https://chicagoelections.com/en/election-results.html' + election_url = 'https://chicagoelections.gov/en/election-results.html' if session is None: session = requests.Session() @@ -140,7 +143,7 @@ def elections(session=None): session = session response = session.get(election_url) - page = lxml.html.fromstring(response.text) + page = lxml.html.fromstring(response.content.decode('utf-8')) election_links = page.xpath("//a[starts-with(@href, 'election-results.asp?election=')]") From a9371d9bb554f7418296ecca40b45ee6342801e7 Mon Sep 17 00:00:00 2001 From: hannah cushman garland Date: Fri, 31 Mar 2023 14:46:28 -0500 Subject: [PATCH 2/2] Undo decode --- chi_elections/precincts.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/chi_elections/precincts.py b/chi_elections/precincts.py index 7f3109c..5f10489 100644 --- a/chi_elections/precincts.py +++ b/chi_elections/precincts.py @@ -28,7 +28,7 @@ def turnout(self): def races(self): response = self.session.get(self.url, params={'election': self.elec_code}) - page = lxml.html.fromstring(response.content.decode('utf-8')) + page = lxml.html.fromstring(response.text) option_els = page.xpath( "//select[@name='race']/option") @@ -74,7 +74,7 @@ def precincts(self): response = self.session.get(self.RESULTS_URL, data={'election': self.elec_code, 'race': self.number}) - page = lxml.html.fromstring(response.content.decode('utf-8')) + page = lxml.html.fromstring(response.text) tables = page.xpath('//table') tables.pop(0) # Discard total @@ -130,7 +130,6 @@ def total(self): return dict(results_d) - def elections(session=None): '''List all available elections''' @@ -143,7 +142,7 @@ def elections(session=None): session = session response = session.get(election_url) - page = lxml.html.fromstring(response.content.decode('utf-8')) + page = lxml.html.fromstring(response.text) election_links = page.xpath("//a[starts-with(@href, 'election-results.asp?election=')]")