-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbootstrap_sp500_models.py
More file actions
96 lines (75 loc) · 2.92 KB
/
bootstrap_sp500_models.py
File metadata and controls
96 lines (75 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python3
"""
One-time bootstrap for S&P 500:
- Ensures each ticker has price history (limited window) in SQLite
- Builds features in-memory (no feature_store files)
- Trains/updates one OLS model per symbol
This is meant to be run manually (it can take a while the first time).
Daily scheduled runs should use `python3 main.py` (fast daily job).
"""
import os
import sys
import sqlite3
import time
import pandas as pd
from ingest_prices import PriceIngestor
from features import FeatureEngineer
from train import ModelManager
def main():
config_path = "config.yaml"
limit = int(sys.argv[1]) if len(sys.argv) > 1 else None
ing = PriceIngestor(config_path)
fe = FeatureEngineer(config_path)
mm = ModelManager(config_path)
universe = pd.read_csv("./universe/sp500.csv")
tickers = [str(t).strip().upper() for t in universe["ticker"].tolist() if str(t).strip()]
if limit:
tickers = tickers[:limit]
# Conservative throttle similar to daily job.
key_count = len(ing.twelvedata_keys.keys())
assumed_rpm = 5
overall_rps = (key_count * assumed_rpm) / 60.0 if key_count else 0.0
sleep_s = 1.0 / overall_rps if overall_rps > 0 else 0.6
sleep_s = max(0.2, min(2.0, sleep_s))
latest_market_date = ing.get_latest_market_date()
for idx, sym in enumerate(tickers, 1):
try:
last = ing.get_latest_date_for_symbol(sym)
except Exception:
last = None
df_prices = None
src = str(getattr(ing, "price_source", "stooq") or "stooq").strip().lower()
if src == "auto":
src = "twelvedata" if ing.twelvedata_keys.keys() else "stooq"
try:
if last is None:
df_prices = ing.fetch_twelvedata_daily(sym) if src == "twelvedata" else ing.fetch_stooq_data(sym)
elif latest_market_date and last != latest_market_date:
df_prices = ing.fetch_twelvedata_daily(sym, outputsize=10) if src == "twelvedata" else ing.fetch_stooq_latest(sym)
except Exception:
df_prices = None
if df_prices is not None and not df_prices.empty:
conn = sqlite3.connect(ing.db_path)
try:
ing._sqlite_upsert(type("Table", (), {"name": "prices"}), conn, df_prices.columns.tolist(), df_prices.values.tolist())
finally:
conn.close()
# Train if missing/stale.
try:
if mm._should_retrain(sym):
df_feat = fe.generate(sym)
if df_feat is not None and not df_feat.empty:
mm.train_ols(sym, features_df=df_feat)
except Exception:
pass
if idx % 25 == 0:
print(f"Progress: {idx}/{len(tickers)}")
time.sleep(sleep_s)
# Keep disk usage small.
try:
mm.prune_models_keep_latest_only()
except Exception:
pass
print("Bootstrap complete.")
if __name__ == "__main__":
main()