-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcache.py
More file actions
124 lines (112 loc) · 3.39 KB
/
cache.py
File metadata and controls
124 lines (112 loc) · 3.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import json
from datetime import datetime, timezone
from typing import Optional, Tuple, Dict
# Try DuckDB first; if unavailable, fall back to JSON file cache
try:
import duckdb # type: ignore
_HAS_DUCKDB = True
except Exception:
_HAS_DUCKDB = False
CACHE_FILE = os.environ.get("CACHE_DB", "cache.duckdb") if _HAS_DUCKDB else os.environ.get("CACHE_JSON", "cache.json")
# -------------------- DuckDB backend --------------------
if _HAS_DUCKDB:
SCHEMA = """
CREATE TABLE IF NOT EXISTS profiles (
url TEXT PRIMARY KEY,
retrieved_at TIMESTAMP,
data JSON
);
"""
def _connect():
con = duckdb.connect(CACHE_FILE)
con.execute(SCHEMA)
return con
def get(url: str, ttl_minutes: int = 360) -> Tuple[Optional[Dict], Optional[int], bool]:
try:
con = _connect()
res = con.execute("SELECT data, retrieved_at FROM profiles WHERE url = ?", [url]).fetchone()
con.close()
if not res:
return None, None, False
data_json, retrieved_at = res
retrieved_at = retrieved_at.replace(tzinfo=timezone.utc) if retrieved_at.tzinfo is None else retrieved_at
now = datetime.now(timezone.utc)
age_min = int((now - retrieved_at).total_seconds() // 60)
if age_min <= ttl_minutes:
data = data_json if isinstance(data_json, dict) else json.loads(data_json)
data["from_cache"] = True
data["cache_age_min"] = age_min
return data, age_min, True
return None, age_min, False
except Exception:
return None, None, False
def set(url: str, data: Dict) -> None:
try:
payload = json.dumps(data)
con = _connect()
con.execute(
"INSERT OR REPLACE INTO profiles(url, retrieved_at, data) VALUES (?, CURRENT_TIMESTAMP, ?)",
[url, payload],
)
con.close()
except Exception:
pass
def clear_cache() -> None:
"""Clear all cached data"""
try:
con = _connect()
con.execute("DELETE FROM profiles")
con.close()
print("✅ DuckDB cache cleared")
except Exception as e:
print(f"❌ Error clearing DuckDB cache: {e}")
# -------------------- JSON file backend --------------------
else:
def _read_json() -> Dict[str, Dict]:
if not os.path.exists(CACHE_FILE):
return {}
try:
with open(CACHE_FILE, "r", encoding="utf-8") as f:
return json.load(f) or {}
except Exception:
return {}
def _write_json(db: Dict[str, Dict]) -> None:
try:
with open(CACHE_FILE, "w", encoding="utf-8") as f:
json.dump(db, f)
except Exception:
pass
def get(url: str, ttl_minutes: int = 360) -> Tuple[Optional[Dict], Optional[int], bool]:
db = _read_json()
item = db.get(url)
if not item:
return None, None, False
try:
retrieved_at = datetime.fromisoformat(item.get("retrieved_at"))
except Exception:
return None, None, False
now = datetime.now(timezone.utc)
# stored naive; treat as UTC
retrieved_at = retrieved_at.replace(tzinfo=timezone.utc)
age_min = int((now - retrieved_at).total_seconds() // 60)
if age_min <= ttl_minutes:
data = item.get("data") or {}
data["from_cache"] = True
data["cache_age_min"] = age_min
return data, age_min, True
return None, age_min, False
def set(url: str, data: Dict) -> None:
db = _read_json()
db[url] = {
"retrieved_at": datetime.now(timezone.utc).isoformat(),
"data": data,
}
_write_json(db)
def clear_cache() -> None:
"""Clear all cached data"""
try:
_write_json({})
print("✅ JSON cache cleared")
except Exception as e:
print(f"❌ Error clearing JSON cache: {e}")