-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathglobalapi_api_client.py
More file actions
325 lines (267 loc) · 11.2 KB
/
globalapi_api_client.py
File metadata and controls
325 lines (267 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
"""
API client for CityCatalyst Global API
Base URL is configurable via the GLOBALAPI_BASE_URL environment variable.
"""
import os
import httpx
BASE_URL = os.getenv("GLOBALAPI_BASE_URL", "https://ccglobal.openearth.dev").rstrip("/")
def get_health() -> dict:
"""
Check the health of the CityCatalyst Global API service.
Returns:
dict: Health status information
"""
response = httpx.get(f"{BASE_URL}/health", timeout=10.0)
response.raise_for_status()
return response.json()
def _fetch_city_emission(source: str, city: str, year: str, gpc_reference_number: str, gwp: str = "ar5") -> str | None:
"""
Single-scope helper: fetch total CO2eq (100yr) for a given GPC reference number.
"""
import urllib.parse
city_encoded = urllib.parse.quote(city)
path = f"/api/v1/source/{source}/city/{city_encoded}/{year}/{gpc_reference_number}"
params = {"gwp": gwp}
response = httpx.get(f"{BASE_URL}{path}", params=params, timeout=10.0)
response.raise_for_status()
data = response.json()
return data.get("totals", {}).get("emissions", {}).get("co2eq_100yr")
def get_city_emissions(source: str, city: str, year: str, gpc_reference_number: str, gwp: str = "ar5") -> str | None:
"""
Get total CO2eq emissions from CityCatalyst Global API for a single GPC scope.
Args:
source: Data source (e.g., "SEEG")
city: City identifier (e.g., "BR SER")
year: Year (e.g., "2022")
gpc_reference_number: GPC reference number (e.g., "II.1.1")
gwp: Global Warming Potential standard (default: "ar5")
Returns:
str | None: Total CO2eq emissions (100yr) value, or None when no data exists.
"""
return _fetch_city_emission(source, city, year, gpc_reference_number, gwp)
def get_city_emissions_all_scopes(
source: str,
city: str,
year: str,
gwp: str = "ar5",
gpc_scopes: list[str] | None = None,
) -> dict:
"""
Retrieve emissions across all GPC scopes for a given city and source.
If any scope returns no data, it is marked as empty so the caller can surface that
to the LLM.
Returns:
dict: {
"gpc_scopes": [...],
"emissions": [
{"gpc_reference_number": "I.1.1", "co2eq_100yr": 123, "status": "ok"},
{"gpc_reference_number": "II.1.1", "co2eq_100yr": None, "status": "empty", "message": "..."},
],
}
"""
scopes = gpc_scopes or get_gpc_reference_numbers_by_source(source)
emissions: list[dict] = []
for scope in scopes:
try:
value = _fetch_city_emission(source, city, year, scope, gwp)
if value is None:
emissions.append(
{
"gpc_reference_number": scope,
"co2eq_100yr": None,
"status": "empty",
"message": "No emissions data for this scope.",
}
)
else:
emissions.append(
{
"gpc_reference_number": scope,
"co2eq_100yr": value,
"status": "ok",
}
)
except httpx.HTTPStatusError as exc: # type: ignore[attr-defined]
if exc.response is not None and exc.response.status_code == 404:
emissions.append(
{
"gpc_reference_number": scope,
"co2eq_100yr": None,
"status": "empty",
"message": "No emissions data for this scope (404 from API).",
}
)
else:
emissions.append(
{
"gpc_reference_number": scope,
"co2eq_100yr": None,
"status": "error",
"message": str(exc),
}
)
except Exception as exc: # noqa: BLE001
emissions.append(
{
"gpc_reference_number": scope,
"co2eq_100yr": None,
"status": "error",
"message": str(exc),
}
)
return {"gpc_scopes": scopes, "emissions": emissions}
def get_city_area(locode: str) -> dict:
"""
Get the area of a city by its locode.
Args:
locode: Unique identifier for the city
Returns:
dict: City area in square kilometers
"""
response = httpx.get(f"{BASE_URL}/api/v0/cityboundary/city/{locode}/area", timeout=10.0)
response.raise_for_status()
return response.json()
def get_catalogue(format: str = None) -> dict:
"""
Get the data catalogue from CityCatalyst Global API.
Args:
format: Optional format parameter (e.g., "csv"). When "csv", returns raw text.
Returns:
dict or str: Catalogue data with list of datasources, or CSV text if format="csv"
"""
params = {"format": format} if format else {}
response = httpx.get(f"{BASE_URL}/api/v0/catalogue", params=params, timeout=10.0)
response.raise_for_status()
if format and str(format).lower() == "csv":
# Endpoint returns CSV when format=csv; surface the raw text instead of JSON parsing.
return response.text
return response.json()
def get_cities_by_country(country_code: str) -> dict:
"""
Get a list of cities (locodes) for a given country.
Args:
country_code: ISO alpha-2 country code (e.g., "BR", "AR")
Returns:
dict: Response from the /api/v0/ccra/city/{country_code} endpoint.
"""
response = httpx.get(f"{BASE_URL}/api/v0/ccra/city/{country_code}", timeout=10.0)
response.raise_for_status()
return response.json()
def list_available_country_codes(prefer_iso2: bool = True) -> list[str]:
"""
Derive available country codes from the catalogue.
Args:
prefer_iso2: When True, return only 2-letter codes; otherwise include any codes seen.
Returns:
Sorted list of unique country codes present in catalogue datasources.
"""
catalogue = get_catalogue()
datasources = catalogue.get("datasources", []) if isinstance(catalogue, dict) else []
codes = set()
for ds in datasources:
loc = str(ds.get("geographical_location", "")).strip()
if not loc:
continue
# Some entries might have mixed or longer strings; optionally filter to ISO2 length
if prefer_iso2 and len(loc) == 2:
codes.add(loc.upper())
elif not prefer_iso2:
codes.add(loc.upper())
return sorted(codes)
def get_gpc_reference_numbers_by_source(source: str) -> list:
"""
Get all GPC reference numbers covered by a particular source.
Args:
source: Data source name (e.g., "SEEG" or "SEEGv2023")
Returns:
list: List of unique GPC reference numbers for the specified source, sorted alphabetically
"""
catalogue = get_catalogue()
# Extract GPC reference numbers for the specified source
gpc_refs = set()
# The catalogue has a "datasources" list
if isinstance(catalogue, dict) and "datasources" in catalogue:
for datasource in catalogue["datasources"]:
# Match by various possible source identifiers
datasource_name = datasource.get("datasource_name", "")
publisher_id = datasource.get("publisher_id", "")
api_endpoint = datasource.get("api_endpoint", "")
# Check if source matches any of these fields (case-insensitive)
source_upper = source.upper()
if (source_upper in datasource_name.upper() or
source_upper in publisher_id.upper() or
source_upper in api_endpoint.upper() or
f"/source/{source}/" in api_endpoint or
f"/source/{source_upper}/" in api_endpoint):
# Extract gpc_reference_number if present
if "gpc_reference_number" in datasource and datasource["gpc_reference_number"]:
gpc_refs.add(datasource["gpc_reference_number"])
return sorted(list(gpc_refs))
def list_datasources(filter_text: str | None = None) -> list[dict]:
"""
List datasources from the catalogue with key metadata for discovery.
Args:
filter_text: Optional case-insensitive filter applied to publisher_id,
datasource_name, or api_endpoint.
Returns:
list of dicts: Each entry contains publisher_id (source), gpc_reference_number,
start/end/latest years, spatial_resolution, geographical_location,
and api_endpoint.
"""
catalogue = get_catalogue()
datasources = catalogue.get("datasources", []) if isinstance(catalogue, dict) else []
results: list[dict] = []
needle = filter_text.lower() if filter_text else None
for ds in datasources:
publisher_id = ds.get("publisher_id", "")
datasource_name = ds.get("datasource_name", "")
api_endpoint = ds.get("api_endpoint", "")
if needle:
blob = f"{publisher_id} {datasource_name} {api_endpoint}".lower()
if needle not in blob:
continue
results.append(
{
"publisher_id": publisher_id,
"datasource_name": datasource_name,
"gpc_reference_number": ds.get("gpc_reference_number"),
"start_year": ds.get("start_year"),
"end_year": ds.get("end_year"),
"latest_accounting_year": ds.get("latest_accounting_year"),
"spatial_resolution": ds.get("spatial_resolution"),
"geographical_location": ds.get("geographical_location"),
"api_endpoint": api_endpoint,
}
)
return results
def get_source_years(source: str) -> dict | None:
"""
Get year coverage for a datasource by matching catalogue entries.
Args:
source: Source identifier (matches publisher_id, datasource_name, or api_endpoint).
Returns:
dict with start_year, end_year, latest_accounting_year, and gpc_reference_number,
or None if no match is found.
"""
catalogue = get_catalogue()
datasources = catalogue.get("datasources", []) if isinstance(catalogue, dict) else []
source_upper = source.upper()
for ds in datasources:
publisher_id = str(ds.get("publisher_id", ""))
datasource_name = str(ds.get("datasource_name", ""))
api_endpoint = str(ds.get("api_endpoint", ""))
if (
source_upper in publisher_id.upper()
or source_upper in datasource_name.upper()
or source_upper in api_endpoint.upper()
):
return {
"publisher_id": publisher_id,
"datasource_name": datasource_name,
"gpc_reference_number": ds.get("gpc_reference_number"),
"start_year": ds.get("start_year"),
"end_year": ds.get("end_year"),
"latest_accounting_year": ds.get("latest_accounting_year"),
"geographical_location": ds.get("geographical_location"),
}
return None