Skip to content

Commit 81c8596

Browse files
feat: enhance globalapi with new features and configurable base URL
1 parent 9d13b69 commit 81c8596

10 files changed

Lines changed: 761 additions & 17 deletions

.dockerignore

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
.git
2+
__pycache__/
3+
*.pyc
4+
*.pyo
5+
*.pyd
6+
.env
7+
.venv/
8+
venv/
9+
.mypy_cache/
10+
.pytest_cache/
11+
Dockerfile
12+
build/
13+
dist/

.github/workflows/backend.yml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
name: MCP Global API Deployment
2+
3+
on:
4+
workflow_dispatch:
5+
push:
6+
branches:
7+
- main
8+
paths:
9+
- '**'
10+
- '.github/workflows/backend.yml'
11+
12+
permissions:
13+
contents: read
14+
packages: write
15+
16+
env:
17+
REGISTRY: ghcr.io
18+
IMAGE_NAME: ghcr.io/open-earth-foundation/mcp-global-api
19+
20+
jobs:
21+
build:
22+
runs-on: ubuntu-latest
23+
outputs:
24+
image_tag: ${{ steps.vars.outputs.commit_hash }}
25+
steps:
26+
- name: Checkout repository
27+
uses: actions/checkout@v4
28+
29+
- name: Set up Docker Buildx
30+
uses: docker/setup-buildx-action@v3
31+
32+
- name: Log in to GitHub Container Registry
33+
uses: docker/login-action@v3
34+
with:
35+
registry: ${{ env.REGISTRY }}
36+
username: ${{ github.actor }}
37+
password: ${{ secrets.GITHUB_TOKEN }}
38+
39+
- name: Get Git commit hash
40+
id: vars
41+
run: echo "commit_hash=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
42+
43+
- name: Build and push image
44+
uses: docker/build-push-action@v5
45+
with:
46+
context: .
47+
file: ./Dockerfile
48+
push: true
49+
platforms: linux/amd64
50+
tags: |
51+
${{ env.IMAGE_NAME }}:${{ steps.vars.outputs.commit_hash }}
52+
${{ env.IMAGE_NAME }}:latest
53+
54+
deploy:
55+
needs: build
56+
runs-on: ubuntu-latest
57+
env:
58+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_EKS_DEV_USER }}
59+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_EKS_DEV_USER }}
60+
EKS_DEV_NAME: ${{ secrets.EKS_DEV_NAME }}
61+
IMAGE_TAG: ${{ needs.build.outputs.image_tag }}
62+
IMAGE_NAME: ${{ env.IMAGE_NAME }}
63+
steps:
64+
- name: Checkout repository
65+
uses: actions/checkout@v4
66+
67+
- name: Configure AWS credentials
68+
uses: aws-actions/configure-aws-credentials@v4
69+
with:
70+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID_EKS_DEV_USER }}
71+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY_EKS_DEV_USER }}
72+
aws-region: us-east-1
73+
74+
- name: Creating kubeconfig file
75+
run: aws eks update-kubeconfig --name "$EKS_DEV_NAME" --region us-east-1
76+
77+
- name: Testing connection to EKS
78+
run: kubectl get pods -n default
79+
80+
- name: Apply Kubernetes manifests
81+
run: |
82+
kubectl apply -f k8s/mcp-deployment.yml
83+
kubectl apply -f k8s/mcp-service.yml
84+
85+
- name: Update deployment image
86+
run: |
87+
kubectl set image deployment/mcp-global-api mcp-global-api=${IMAGE_NAME}:${IMAGE_TAG} -n default
88+
kubectl rollout status deployment/mcp-global-api -n default

Dockerfile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
FROM python:3.12-slim
2+
3+
WORKDIR /app
4+
5+
ENV PYTHONDONTWRITEBYTECODE=1 \
6+
PYTHONUNBUFFERED=1 \
7+
GLOBALAPI_BASE_URL=https://ccglobal.openearth.dev \
8+
MCP_HOST=0.0.0.0 \
9+
MCP_PORT=8000 \
10+
MCP_PATH=/mcp \
11+
MCP_TRANSPORT=http
12+
13+
COPY requirements.txt .
14+
RUN pip install --no-cache-dir -r requirements.txt
15+
16+
COPY . .
17+
18+
EXPOSE 8000
19+
20+
CMD ["python", "globalapi_mcp_server.py", "--transport", "http"]

config.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
openai_model: "gpt-5.1"
2+
mcp_server_url: "https://mcp-global-api.openearth.dev/mcp"
3+
mcp_server_path: "./globalapi_mcp_server.py"

globalapi_api_client.py

Lines changed: 130 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""
22
API client for CityCatalyst Global API
3-
Base URL: https://ccglobal.openearth.dev
3+
Base URL is configurable via the GLOBALAPI_BASE_URL environment variable.
44
"""
5+
import os
6+
57
import httpx
68

7-
BASE_URL = "https://ccglobal.openearth.dev"
9+
BASE_URL = os.getenv("GLOBALAPI_BASE_URL", "https://ccglobal.openearth.dev").rstrip("/")
810

911

1012
def get_health() -> dict:
@@ -72,17 +74,60 @@ def get_catalogue(format: str = None) -> dict:
7274
Get the data catalogue from CityCatalyst Global API.
7375
7476
Args:
75-
format: Optional format parameter (e.g., "csv")
77+
format: Optional format parameter (e.g., "csv"). When "csv", returns raw text.
7678
7779
Returns:
78-
dict: Catalogue data with list of datasources
80+
dict or str: Catalogue data with list of datasources, or CSV text if format="csv"
7981
"""
8082
params = {"format": format} if format else {}
8183
response = httpx.get(f"{BASE_URL}/api/v0/catalogue", params=params, timeout=10.0)
8284
response.raise_for_status()
85+
if format and str(format).lower() == "csv":
86+
# Endpoint returns CSV when format=csv; surface the raw text instead of JSON parsing.
87+
return response.text
88+
return response.json()
89+
90+
91+
def get_cities_by_country(country_code: str) -> dict:
92+
"""
93+
Get a list of cities (locodes) for a given country.
94+
95+
Args:
96+
country_code: ISO alpha-2 country code (e.g., "BR", "AR")
97+
98+
Returns:
99+
dict: Response from the /api/v0/ccra/city/{country_code} endpoint.
100+
"""
101+
response = httpx.get(f"{BASE_URL}/api/v0/ccra/city/{country_code}", timeout=10.0)
102+
response.raise_for_status()
83103
return response.json()
84104

85105

106+
def list_available_country_codes(prefer_iso2: bool = True) -> list[str]:
107+
"""
108+
Derive available country codes from the catalogue.
109+
110+
Args:
111+
prefer_iso2: When True, return only 2-letter codes; otherwise include any codes seen.
112+
113+
Returns:
114+
Sorted list of unique country codes present in catalogue datasources.
115+
"""
116+
catalogue = get_catalogue()
117+
datasources = catalogue.get("datasources", []) if isinstance(catalogue, dict) else []
118+
codes = set()
119+
for ds in datasources:
120+
loc = str(ds.get("geographical_location", "")).strip()
121+
if not loc:
122+
continue
123+
# Some entries might have mixed or longer strings; optionally filter to ISO2 length
124+
if prefer_iso2 and len(loc) == 2:
125+
codes.add(loc.upper())
126+
elif not prefer_iso2:
127+
codes.add(loc.upper())
128+
return sorted(codes)
129+
130+
86131
def get_gpc_reference_numbers_by_source(source: str) -> list:
87132
"""
88133
Get all GPC reference numbers covered by a particular source.
@@ -120,3 +165,84 @@ def get_gpc_reference_numbers_by_source(source: str) -> list:
120165

121166
return sorted(list(gpc_refs))
122167

168+
169+
def list_datasources(filter_text: str | None = None) -> list[dict]:
170+
"""
171+
List datasources from the catalogue with key metadata for discovery.
172+
173+
Args:
174+
filter_text: Optional case-insensitive filter applied to publisher_id,
175+
datasource_name, or api_endpoint.
176+
177+
Returns:
178+
list of dicts: Each entry contains publisher_id (source), gpc_reference_number,
179+
start/end/latest years, spatial_resolution, geographical_location,
180+
and api_endpoint.
181+
"""
182+
catalogue = get_catalogue()
183+
datasources = catalogue.get("datasources", []) if isinstance(catalogue, dict) else []
184+
results: list[dict] = []
185+
needle = filter_text.lower() if filter_text else None
186+
187+
for ds in datasources:
188+
publisher_id = ds.get("publisher_id", "")
189+
datasource_name = ds.get("datasource_name", "")
190+
api_endpoint = ds.get("api_endpoint", "")
191+
if needle:
192+
blob = f"{publisher_id} {datasource_name} {api_endpoint}".lower()
193+
if needle not in blob:
194+
continue
195+
196+
results.append(
197+
{
198+
"publisher_id": publisher_id,
199+
"datasource_name": datasource_name,
200+
"gpc_reference_number": ds.get("gpc_reference_number"),
201+
"start_year": ds.get("start_year"),
202+
"end_year": ds.get("end_year"),
203+
"latest_accounting_year": ds.get("latest_accounting_year"),
204+
"spatial_resolution": ds.get("spatial_resolution"),
205+
"geographical_location": ds.get("geographical_location"),
206+
"api_endpoint": api_endpoint,
207+
}
208+
)
209+
210+
return results
211+
212+
213+
def get_source_years(source: str) -> dict | None:
214+
"""
215+
Get year coverage for a datasource by matching catalogue entries.
216+
217+
Args:
218+
source: Source identifier (matches publisher_id, datasource_name, or api_endpoint).
219+
220+
Returns:
221+
dict with start_year, end_year, latest_accounting_year, and gpc_reference_number,
222+
or None if no match is found.
223+
"""
224+
catalogue = get_catalogue()
225+
datasources = catalogue.get("datasources", []) if isinstance(catalogue, dict) else []
226+
source_upper = source.upper()
227+
228+
for ds in datasources:
229+
publisher_id = str(ds.get("publisher_id", ""))
230+
datasource_name = str(ds.get("datasource_name", ""))
231+
api_endpoint = str(ds.get("api_endpoint", ""))
232+
if (
233+
source_upper in publisher_id.upper()
234+
or source_upper in datasource_name.upper()
235+
or source_upper in api_endpoint.upper()
236+
):
237+
return {
238+
"publisher_id": publisher_id,
239+
"datasource_name": datasource_name,
240+
"gpc_reference_number": ds.get("gpc_reference_number"),
241+
"start_year": ds.get("start_year"),
242+
"end_year": ds.get("end_year"),
243+
"latest_accounting_year": ds.get("latest_accounting_year"),
244+
"geographical_location": ds.get("geographical_location"),
245+
}
246+
247+
return None
248+

0 commit comments

Comments
 (0)