-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
93 lines (73 loc) · 3.31 KB
/
parser.py
File metadata and controls
93 lines (73 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import logging
from typing import Tuple
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
# Columns when fetching all tickers (includes Company Name)
COLUMNS_ALL = [
'X', 'Filing Date', 'Trade Date', 'Ticker', 'Company Name',
'Insider Name', 'Title', 'Trade Type', 'Price', 'Qty',
'Owned', 'ΔOwn', 'Value', '1d', '1w', '1m', '6m',
]
LINK_COLUMNS_ALL = ['Filing Date', 'Ticker', 'Company Name', 'Insider Name']
# Columns when fetching a specific ticker (no Company Name)
COLUMNS_TICKER = [
'X', 'Filing Date', 'Trade Date', 'Ticker',
'Insider Name', 'Title', 'Trade Type', 'Price', 'Qty',
'Owned', 'ΔOwn', 'Value', '1d', '1w', '1m', '6m',
]
LINK_COLUMNS_TICKER = ['Filing Date', 'Ticker', 'Insider Name']
class OpenInsiderParser:
"""Parses OpenInsider HTML into a list of transaction dicts."""
def parse(self, html: str, ticker: str = None) -> Tuple[list, str]:
"""
Parse the HTML page and return transactions plus the Finviz URL.
Args:
html: Raw HTML string from the OpenInsider screener page.
ticker: If provided, uses the ticker-specific column layout
(no Company Name column). Pass None for the all-tickers layout.
Returns:
Tuple of (transactions: list[dict], finviz_url: str | None).
"""
soup = BeautifulSoup(html, 'html.parser')
# Extract Finviz link (present on ticker-specific pages)
finviz_url = None
finviz_link = soup.find('a', href=lambda h: h and 'finviz.com/quote.ashx' in h)
if finviz_link:
finviz_url = finviz_link.get('href')
logger.info("Found Finviz URL: %s", finviz_url)
table = soup.find('table', {'class': 'tinytable'})
if not table:
logger.warning("No 'tinytable' found in HTML")
return [], finviz_url
column_names = COLUMNS_TICKER if ticker else COLUMNS_ALL
link_columns = LINK_COLUMNS_TICKER if ticker else LINK_COLUMNS_ALL
tbody = table.find('tbody') or table
rows = tbody.find_all('tr')
data = []
for row in rows:
cols = row.find_all('td')
if not cols or len(cols) < 2:
continue
row_data = self._extract_row(cols, column_names, link_columns)
if row_data.get('Ticker') or row_data.get('Insider Name'):
data.append(row_data)
logger.info("Parsed %d transactions", len(data))
return data, finviz_url
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
def _extract_row(self, cols: list, column_names: list, link_columns: list) -> dict:
"""Extract a single table row into a dict."""
row_data = {}
for i, col in enumerate(cols):
if i >= len(column_names):
break
col_name = column_names[i]
row_data[col_name] = col.get_text(strip=True)
if col_name in link_columns:
link = col.find('a')
href = link.get('href') if link else None
if href and href.startswith('/'):
href = f"http://openinsider.com{href}"
row_data[f"{col_name}_link"] = href
return row_data