Skip to content
Merged

v2 #4

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,40 @@ env:
IMAGE_NAME: ${{ github.repository_owner }}/gptkit

jobs:
test:
name: Run tests
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Restore pip cache
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-

- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi

- name: Run pytest
run: |
pytest -q

build:

runs-on: ubuntu-latest
needs: test
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
permissions:
contents: read
packages: write
Expand Down
24 changes: 20 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,24 @@ volumes:
```bash
uvicorn app.main:app --reload
```

3. **Tests**:
```bash
curl "http://localhost:8000/domain/whois?domain=example.com"
```

- Quick API smoke test (curl):
```bash
curl "http://localhost:8000/domain/whois?domain=example.com"
```

- Run the unit test suite with pytest (from the project root):
```bash
# activate your virtualenv if you have one, e.g.:
source venv/bin/activate

# install test/dev dependencies if needed
pip install -r requirements.txt

# run all tests
pytest -q

# run a single test file
pytest tests/test_whois_parsing.py -q
```
18 changes: 18 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
from fastapi import FastAPI
from app.routers import domain
import logging
import logging.handlers
import os

# Configure logging to server.log
log_dir = os.path.dirname(os.path.abspath(__file__))
log_file = os.path.join(log_dir, "..", "server.log")

# Create file handler
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(logging.DEBUG)
file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)

# Configure root logger
root_logger = logging.getLogger()
root_logger.setLevel(logging.DEBUG)
root_logger.addHandler(file_handler)

app = FastAPI(
title="GPTKit",
Expand Down
105 changes: 101 additions & 4 deletions app/routers/domain.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import Optional
from app.services.cache import WhoisCache
from app.services.whois import WhoisService
from app.services.rate_limiter import RateLimiter
import logging

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/domain", tags=["domain"])

Expand All @@ -14,16 +18,22 @@

class WhoisResponse(BaseModel):
domain: str
checked_at: str
tld: str
available: bool
checked_at: str
raw: str
pendingDelete: bool = False
redemptionPeriod: bool = False
statut: Optional[str] = None
creation_date: Optional[str] = None
registrar: Optional[str] = None
# raw is intentionally omitted from the public response

@router.get("/whois", response_model=WhoisResponse)
async def get_whois(
domain: str = Query(..., description="Domain name to check"),
force: int = Query(0, description="Force fresh lookup (1 to force)")
):
logger.info(f"get_whois called for domain={domain}, force={force}")
# 1. Validation
if "." not in domain:
raise HTTPException(
Expand All @@ -36,11 +46,90 @@ async def get_whois(
tld = parts[-1]

# 2. Cache
def parse_whois(raw: str, tld: str):
"""Extract statut, creation_date, registrar, pendingDelete, redemptionPeriod for all TLDs.

This is heuristic: we search common WHOIS labels case-insensitively.
Returns a dict with keys 'statut', 'creation_date', 'registrar', 'pendingDelete', 'redemptionPeriod'.
"""
if not raw:
return {
"statut": None,
"creation_date": None,
"registrar": None,
"pendingDelete": False,
"redemptionPeriod": False,
}

raw_lines = [l.strip() for l in raw.splitlines() if l.strip()]
lower = raw.lower()

statut = None
creation_date = None
registrar = None
pendingDelete = False
redemptionPeriod = False

import re

# Common patterns (now generalized for all TLDs)
for line in raw_lines:
l = line.lower()
# Registrar: (ignore Registrar WHOIS Server and Registrar URL)
if registrar is None and l.startswith("registrar:") and not ("whois server" in l or "url" in l):
parts = line.split(":", 1)
if len(parts) == 2:
registrar = parts[1].strip()
continue
# Creation date
if creation_date is None and ("creation date" in l or "created on" in l or "created:" in l or "creation:" in l or "registered on" in l):
parts = line.split(":", 1)
if len(parts) == 2:
creation_date = parts[1].strip()
continue
# Status lines (can have multiple)
if "status:" in l or l.startswith("domain status"):
if statut is None:
parts = line.split(":", 1)
if len(parts) == 2:
statut = parts[1].strip()
# Check for pendingDelete and redemptionPeriod in any status line
if "pendingdelete" in l:
pendingDelete = True
if "redemptionperiod" in l:
redemptionPeriod = True
continue

# Fallback regex for Registrar lines like 'Registrar Name' without colon
if registrar is None:
m = re.search(r"registrar\s+([\w\-\. ]{3,})", raw, re.IGNORECASE)
if m:
registrar = m.group(1).strip()

return {
"statut": statut,
"creation_date": creation_date,
"registrar": registrar,
"pendingDelete": pendingDelete,
"redemptionPeriod": redemptionPeriod,
}

if force != 1:
cached_data = cache.get(domain)
if cached_data:
# enrich from raw before removing it
parsed = parse_whois(cached_data.get("raw"), tld)
# ne pas exposer le champ raw dans la réponse JSON
cached_data.pop("raw", None)
# inject parsed fields so response_model includes them
cached_data.update(parsed)
# ensure coherence: if pendingDelete or redemptionPeriod, available must be False
if cached_data.get("pendingDelete") or cached_data.get("redemptionPeriod"):
cached_data["available"] = False
return cached_data

logger.debug(f"Cache miss or force=1, performing lookup for {domain}")

# 3. Rate Limiting
if not rate_limiter.check(domain):
raise HTTPException(
Expand Down Expand Up @@ -68,6 +157,14 @@ async def get_whois(
# Fetch back to ensure we return exactly what's in the cache (including timestamp)
cached_data = cache.get(domain)
if not cached_data:
raise HTTPException(status_code=500, detail="Failed to retrieve data from cache after save")

raise HTTPException(status_code=500, detail="Failed to retrieve data from cache after save")
# enrich from raw before removing it (comme pour le cache hit)
parsed = parse_whois(cached_data.get("raw"), tld)
cached_data.pop("raw", None)
cached_data.update(parsed)
# ensure coherence: if pendingDelete or redemptionPeriod, available must be False
if cached_data.get("pendingDelete") or cached_data.get("redemptionPeriod"):
cached_data["available"] = False
return cached_data
cached_data.update(parsed)
return cached_data
24 changes: 17 additions & 7 deletions app/services/cache.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import sqlite3
from datetime import datetime, timezone
from typing import Optional, Dict, Any

import logging
import os

logger = logging.getLogger(__name__)

class WhoisCache:
def __init__(self):
self.db_path = "data/whois_cache.db"
Expand All @@ -29,15 +31,23 @@ def get(self, domain: str) -> Optional[Dict[str, Any]]:
cursor = conn.execute("SELECT * FROM whois_cache WHERE domain = ?", (domain,))
row = cursor.fetchone()
if row:
logger.debug(f"Cache HIT for domain: {domain}")
return dict(row)
except sqlite3.Error:
else:
logger.debug(f"Cache MISS for domain: {domain}")
except sqlite3.Error as e:
logger.error(f"Cache error on get({domain}): {e}")
return None
return None

def set(self, domain: str, tld: str, available: bool, raw: str):
checked_at = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
with sqlite3.connect(self.db_path) as conn:
conn.execute("""
INSERT OR REPLACE INTO whois_cache (domain, tld, available, checked_at, raw)
VALUES (?, ?, ?, ?, ?)
""", (domain, tld, available, checked_at, raw))
try:
with sqlite3.connect(self.db_path) as conn:
conn.execute("""
INSERT OR REPLACE INTO whois_cache (domain, tld, available, checked_at, raw)
VALUES (?, ?, ?, ?, ?)
""", (domain, tld, available, checked_at, raw))
logger.debug(f"Cache SET for domain: {domain} (checked_at: {checked_at})")
except sqlite3.Error as e:
logger.error(f"Cache error on set({domain}): {e}")
2 changes: 1 addition & 1 deletion app/services/whois.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def lookup(self, domain: str) -> str:
try:
# Using -H to suppress legal disclaimers if possible, but standard whois usually just works
result = subprocess.run(
["whois", domain],
["whois", "-h", "whois.verisign-grs.com", domain],
capture_output=True,
text=True,
timeout=self.timeout
Expand Down
Loading