|
1 | 1 | """ |
2 | 2 | API client for CityCatalyst Global API |
3 | | -Base URL: https://ccglobal.openearth.dev |
| 3 | +Base URL is configurable via the GLOBALAPI_BASE_URL environment variable. |
4 | 4 | """ |
| 5 | +import os |
| 6 | + |
5 | 7 | import httpx |
6 | 8 |
|
7 | | -BASE_URL = "https://ccglobal.openearth.dev" |
| 9 | +BASE_URL = os.getenv("GLOBALAPI_BASE_URL", "https://ccglobal.openearth.dev").rstrip("/") |
8 | 10 |
|
9 | 11 |
|
10 | 12 | def get_health() -> dict: |
@@ -72,17 +74,60 @@ def get_catalogue(format: str = None) -> dict: |
72 | 74 | Get the data catalogue from CityCatalyst Global API. |
73 | 75 | |
74 | 76 | Args: |
75 | | - format: Optional format parameter (e.g., "csv") |
| 77 | + format: Optional format parameter (e.g., "csv"). When "csv", returns raw text. |
76 | 78 | |
77 | 79 | Returns: |
78 | | - dict: Catalogue data with list of datasources |
| 80 | + dict or str: Catalogue data with list of datasources, or CSV text if format="csv" |
79 | 81 | """ |
80 | 82 | params = {"format": format} if format else {} |
81 | 83 | response = httpx.get(f"{BASE_URL}/api/v0/catalogue", params=params, timeout=10.0) |
82 | 84 | response.raise_for_status() |
| 85 | + if format and str(format).lower() == "csv": |
| 86 | + # Endpoint returns CSV when format=csv; surface the raw text instead of JSON parsing. |
| 87 | + return response.text |
| 88 | + return response.json() |
| 89 | + |
| 90 | + |
| 91 | +def get_cities_by_country(country_code: str) -> dict: |
| 92 | + """ |
| 93 | + Get a list of cities (locodes) for a given country. |
| 94 | +
|
| 95 | + Args: |
| 96 | + country_code: ISO alpha-2 country code (e.g., "BR", "AR") |
| 97 | +
|
| 98 | + Returns: |
| 99 | + dict: Response from the /api/v0/ccra/city/{country_code} endpoint. |
| 100 | + """ |
| 101 | + response = httpx.get(f"{BASE_URL}/api/v0/ccra/city/{country_code}", timeout=10.0) |
| 102 | + response.raise_for_status() |
83 | 103 | return response.json() |
84 | 104 |
|
85 | 105 |
|
| 106 | +def list_available_country_codes(prefer_iso2: bool = True) -> list[str]: |
| 107 | + """ |
| 108 | + Derive available country codes from the catalogue. |
| 109 | +
|
| 110 | + Args: |
| 111 | + prefer_iso2: When True, return only 2-letter codes; otherwise include any codes seen. |
| 112 | +
|
| 113 | + Returns: |
| 114 | + Sorted list of unique country codes present in catalogue datasources. |
| 115 | + """ |
| 116 | + catalogue = get_catalogue() |
| 117 | + datasources = catalogue.get("datasources", []) if isinstance(catalogue, dict) else [] |
| 118 | + codes = set() |
| 119 | + for ds in datasources: |
| 120 | + loc = str(ds.get("geographical_location", "")).strip() |
| 121 | + if not loc: |
| 122 | + continue |
| 123 | + # Some entries might have mixed or longer strings; optionally filter to ISO2 length |
| 124 | + if prefer_iso2 and len(loc) == 2: |
| 125 | + codes.add(loc.upper()) |
| 126 | + elif not prefer_iso2: |
| 127 | + codes.add(loc.upper()) |
| 128 | + return sorted(codes) |
| 129 | + |
| 130 | + |
86 | 131 | def get_gpc_reference_numbers_by_source(source: str) -> list: |
87 | 132 | """ |
88 | 133 | Get all GPC reference numbers covered by a particular source. |
@@ -120,3 +165,84 @@ def get_gpc_reference_numbers_by_source(source: str) -> list: |
120 | 165 |
|
121 | 166 | return sorted(list(gpc_refs)) |
122 | 167 |
|
| 168 | + |
| 169 | +def list_datasources(filter_text: str | None = None) -> list[dict]: |
| 170 | + """ |
| 171 | + List datasources from the catalogue with key metadata for discovery. |
| 172 | +
|
| 173 | + Args: |
| 174 | + filter_text: Optional case-insensitive filter applied to publisher_id, |
| 175 | + datasource_name, or api_endpoint. |
| 176 | +
|
| 177 | + Returns: |
| 178 | + list of dicts: Each entry contains publisher_id (source), gpc_reference_number, |
| 179 | + start/end/latest years, spatial_resolution, geographical_location, |
| 180 | + and api_endpoint. |
| 181 | + """ |
| 182 | + catalogue = get_catalogue() |
| 183 | + datasources = catalogue.get("datasources", []) if isinstance(catalogue, dict) else [] |
| 184 | + results: list[dict] = [] |
| 185 | + needle = filter_text.lower() if filter_text else None |
| 186 | + |
| 187 | + for ds in datasources: |
| 188 | + publisher_id = ds.get("publisher_id", "") |
| 189 | + datasource_name = ds.get("datasource_name", "") |
| 190 | + api_endpoint = ds.get("api_endpoint", "") |
| 191 | + if needle: |
| 192 | + blob = f"{publisher_id} {datasource_name} {api_endpoint}".lower() |
| 193 | + if needle not in blob: |
| 194 | + continue |
| 195 | + |
| 196 | + results.append( |
| 197 | + { |
| 198 | + "publisher_id": publisher_id, |
| 199 | + "datasource_name": datasource_name, |
| 200 | + "gpc_reference_number": ds.get("gpc_reference_number"), |
| 201 | + "start_year": ds.get("start_year"), |
| 202 | + "end_year": ds.get("end_year"), |
| 203 | + "latest_accounting_year": ds.get("latest_accounting_year"), |
| 204 | + "spatial_resolution": ds.get("spatial_resolution"), |
| 205 | + "geographical_location": ds.get("geographical_location"), |
| 206 | + "api_endpoint": api_endpoint, |
| 207 | + } |
| 208 | + ) |
| 209 | + |
| 210 | + return results |
| 211 | + |
| 212 | + |
| 213 | +def get_source_years(source: str) -> dict | None: |
| 214 | + """ |
| 215 | + Get year coverage for a datasource by matching catalogue entries. |
| 216 | +
|
| 217 | + Args: |
| 218 | + source: Source identifier (matches publisher_id, datasource_name, or api_endpoint). |
| 219 | +
|
| 220 | + Returns: |
| 221 | + dict with start_year, end_year, latest_accounting_year, and gpc_reference_number, |
| 222 | + or None if no match is found. |
| 223 | + """ |
| 224 | + catalogue = get_catalogue() |
| 225 | + datasources = catalogue.get("datasources", []) if isinstance(catalogue, dict) else [] |
| 226 | + source_upper = source.upper() |
| 227 | + |
| 228 | + for ds in datasources: |
| 229 | + publisher_id = str(ds.get("publisher_id", "")) |
| 230 | + datasource_name = str(ds.get("datasource_name", "")) |
| 231 | + api_endpoint = str(ds.get("api_endpoint", "")) |
| 232 | + if ( |
| 233 | + source_upper in publisher_id.upper() |
| 234 | + or source_upper in datasource_name.upper() |
| 235 | + or source_upper in api_endpoint.upper() |
| 236 | + ): |
| 237 | + return { |
| 238 | + "publisher_id": publisher_id, |
| 239 | + "datasource_name": datasource_name, |
| 240 | + "gpc_reference_number": ds.get("gpc_reference_number"), |
| 241 | + "start_year": ds.get("start_year"), |
| 242 | + "end_year": ds.get("end_year"), |
| 243 | + "latest_accounting_year": ds.get("latest_accounting_year"), |
| 244 | + "geographical_location": ds.get("geographical_location"), |
| 245 | + } |
| 246 | + |
| 247 | + return None |
| 248 | + |
0 commit comments