Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed .DS_Store
Binary file not shown.
39 changes: 39 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Environment
.env
.env.local
.env.*.local

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
myvenv/
venv/
env/
.venv/
*.egg-info/
.pytest_cache/
.mypy_cache/
.ruff_cache/

# Node / Next.js
node_modules/
.next/
out/
build/
dist/
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*

# OS / IDE
.DS_Store
Thumbs.db
.idea/
.vscode/
*.swp
*.swo
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,53 @@ cd backend
python3 -m venv myvenv
source ./myvenv/bin/activate
pip install -r requirements.txt

# (optional) configure backend runtime endpoints / tokens
cp .env.example .env
# edit .env to override defaults - at minimum set OSDG_TOKEN if you plan to use
# the /api/osdg_api endpoint, and GITHUB_TOKEN to raise GitHub API rate limits

python3 app.py
```

#### Configuration

External service endpoints and credentials are read from environment variables
at runtime. See [`backend/.env.example`](backend/.env.example) for the full
list. Supported variables:

| Variable | Default | Purpose |
| --- | --- | --- |
| `AURORA_API_URL` | `https://aurora-sdg.labs.vu.nl/classifier/classify/elsevier-sdg-multi` | Aurora SDG classifier endpoint |
| `OSDG_API_URL` | `http://20.73.166.85/label_text` | OSDG label-text endpoint |
| `GITHUB_API_URL` | `https://api.github.com` | GitHub REST API base |
| `OSDG_TOKEN` | _(empty)_ | Token required by the `/api/osdg_api` endpoint |
| `GITHUB_TOKEN` | _(empty)_ | GitHub PAT to raise rate limit from 60 to 5000 req/hour |
| `HTTP_TIMEOUT_SECONDS` | `30` | Outbound HTTP request timeout; must be a positive integer |

The app uses [python-dotenv](https://pypi.org/project/python-dotenv/) to load
`backend/.env` automatically when present. Variables already set in the host
environment take precedence.

### 3.2. Frontend Setup

```bash
cd frontend
npm install

# (optional) configure the backend API origin for hosted/staging deployments
cp .env.example .env.local
# edit .env.local if the Flask API is not running at http://127.0.0.1:5000/

npm run dev
```

#### Frontend Configuration

The Next.js frontend reads `NEXT_PUBLIC_API_BASE_URL` at build/runtime to find
the Flask backend. See [`frontend/.env.example`](frontend/.env.example). The
default remains `http://127.0.0.1:5000/` for local development.

### 4. Access the Application

- Frontend: http://localhost:3000
Expand Down
Binary file removed backend/.DS_Store
Binary file not shown.
29 changes: 29 additions & 0 deletions backend/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copy this file to backend/.env and fill in any values that differ from the
# defaults. The defaults match the historical hardcoded values, so a fresh
# checkout works without an .env file.

# --- External classifier endpoints --------------------------------------

# Aurora SDG classifier (Elsevier-SDG-Multi model).
AURORA_API_URL=https://aurora-sdg.labs.vu.nl/classifier/classify/elsevier-sdg-multi

# OSDG label-text endpoint.
OSDG_API_URL=http://20.73.166.85/label_text

# GitHub REST API base URL.
GITHUB_API_URL=https://api.github.com


# --- Credentials --------------------------------------------------------

# Required when calling /api/osdg_api. Request one from the OSDG team.
OSDG_TOKEN=

# Optional. When set, raises GitHub API rate limit from 60 to 5000 req/hour.
GITHUB_TOKEN=


# --- Request tuning -----------------------------------------------------

# Outbound HTTP request timeout in seconds. Must be a positive integer.
HTTP_TIMEOUT_SECONDS=30
Binary file removed backend/__pycache__/classify.cpython-312.pyc
Binary file not shown.
19 changes: 13 additions & 6 deletions backend/app.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
# import uuid
# import json
import requests
from flask import Flask, jsonify, request, abort
from flask_cors import CORS
from datetime import datetime, UTC
import config
from embedding_description import main as classify_description
from embedding_url import main as classify_url
from aurora_api import main as aurora_classify
Expand Down Expand Up @@ -172,17 +172,24 @@ def osdg_external_api():
if not projectDescription:
return jsonify({'error': 'Project description is required'}), 400

if not config.OSDG_TOKEN:
return jsonify({
"error": "OSDG_TOKEN is not configured",
"message": (
"Set OSDG_TOKEN in backend/.env or the host environment "
"before using the OSDG classifier"
)
}), 503

# Call the external OSDG API
try:
osdg_response = requests.post(
"http://20.73.166.85/label_text",
config.OSDG_API_URL,
json={
"text": projectDescription
},
headers={
"token": os.environ.get("OSDG_TOKEN") # Ensure you have the OSDG token set in your environment variables
},
timeout=1000 # Set a timeout for the request
headers={"token": config.OSDG_TOKEN},
timeout=config.HTTP_TIMEOUT_SECONDS
)
osdg_response.raise_for_status() # Raise an error for bad status codes
osdg_result = osdg_response.json()
Expand Down
7 changes: 5 additions & 2 deletions backend/aurora_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import requests
import json
import config
from sdg_constants import SDG_LABELS_DICT as SDG_LABELS


Expand All @@ -16,10 +17,12 @@ def main(text: str, project_name: str = None, project_url: str = None):
Dictionary with predictions in standardized format
"""
try:
url = "https://aurora-sdg.labs.vu.nl/classifier/classify/elsevier-sdg-multi"
url = config.AURORA_API_URL
payload = json.dumps({"text": text})
headers = {'Content-Type': 'application/json'}
response = requests.request("POST", url, headers=headers, data=payload)
response = requests.request(
"POST", url, headers=headers, data=payload, timeout=config.HTTP_TIMEOUT_SECONDS
)
# response.raise_for_status()

raw_result = response.json()
Expand Down
78 changes: 78 additions & 0 deletions backend/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""Centralized runtime configuration for the UN-SDG classifier backend.

External service endpoints and credentials are read from environment variables
so deployments can be reconfigured without code changes. Defaults match the
historical hardcoded values so existing local setups keep working.

See ``backend/.env.example`` for the full list of supported variables.
"""

import os
from pathlib import Path


BACKEND_DIR = Path(__file__).resolve().parent
ENV_FILE = BACKEND_DIR / ".env"

try:
from dotenv import load_dotenv

# Load a .env file from the backend/ directory if one exists.
# Safe to call when the file is absent.
load_dotenv(ENV_FILE)
except ImportError:
# python-dotenv is optional; environment variables set by the host
# process are still honored.
pass


def _get(name: str, default: str = "") -> str:
value = os.environ.get(name)
return value.strip() if value and value.strip() else default


def _get_positive_int(name: str, default: int) -> int:
raw_value = _get(name, str(default))
try:
value = int(raw_value)
except ValueError as exc:
raise ValueError(f"{name} must be an integer number of seconds") from exc

if value <= 0:
raise ValueError(f"{name} must be greater than 0")

return value


# --- External classifier endpoints ----------------------------------------

AURORA_API_URL: str = _get(
"AURORA_API_URL",
"https://aurora-sdg.labs.vu.nl/classifier/classify/elsevier-sdg-multi",
)

OSDG_API_URL: str = _get(
"OSDG_API_URL",
"http://20.73.166.85/label_text",
)

GITHUB_API_URL: str = _get(
"GITHUB_API_URL",
"https://api.github.com",
).rstrip("/")


# --- Credentials ----------------------------------------------------------

# Optional. Required only when using the OSDG endpoint.
OSDG_TOKEN: str = _get("OSDG_TOKEN")

# Optional. When set, GitHub API requests are authenticated, raising the
# rate limit from 60 to 5000 requests/hour.
GITHUB_TOKEN: str = _get("GITHUB_TOKEN")


# --- Request tuning -------------------------------------------------------

# Timeout (seconds) for outbound HTTP requests to external services.
HTTP_TIMEOUT_SECONDS: int = _get_positive_int("HTTP_TIMEOUT_SECONDS", 30)
13 changes: 6 additions & 7 deletions backend/embedding_url.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import re
import base64
import requests
Expand All @@ -7,10 +6,11 @@
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import numpy as np
import config
from sdg_constants import SDG_LABELS, SDG_NAMES, SDG_DESCS

# --- GitHub fetch utilities ---
GITHUB_API = "https://api.github.com"
GITHUB_API = config.GITHUB_API_URL

def parse_repo(url: str) -> Tuple[str, str]:
"""
Expand Down Expand Up @@ -44,15 +44,14 @@ def parse_repo(url: str) -> Tuple[str, str]:

def gh_get(path: str, params: dict = None, accept_preview: bool = False) -> dict:
headers = {"User-Agent": "sdg-classifier"}
token = os.environ.get("GITHUB_TOKEN")
if token:
headers["Authorization"] = f"Bearer {token}"
if config.GITHUB_TOKEN:
headers["Authorization"] = f"Bearer {config.GITHUB_TOKEN}"
if accept_preview:
# topics API requires a custom media type on some API versions
headers["Accept"] = "application/vnd.github.mercy-preview+json, application/vnd.github+json"
else:
headers["Accept"] = "application/vnd.github+json"
r = requests.get(GITHUB_API + path, headers=headers, params=params, timeout=30)
r = requests.get(GITHUB_API + path, headers=headers, params=params, timeout=config.HTTP_TIMEOUT_SECONDS)
r.raise_for_status()
return r.json()

Expand Down Expand Up @@ -216,4 +215,4 @@ def main(url: str):

# if __name__ == "__main__":
# url = "https://github.com/processing/p5.js"
# main(url)
# main(url)
3 changes: 2 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ numpy
accelerate
flask
flask-cors
Flask-UUID
Flask-UUID
python-dotenv
1 change: 1 addition & 0 deletions backend/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading