diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 006eee1..7701802 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -11,9 +11,40 @@ env: IMAGE_NAME: ${{ github.repository_owner }}/gptkit jobs: + test: + name: Run tests + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Restore pip cache + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi + + - name: Run pytest + run: | + pytest -q + build: runs-on: ubuntu-latest + needs: test + if: github.event_name == 'push' && github.ref == 'refs/heads/main' permissions: contents: read packages: write diff --git a/README.md b/README.md index 03769e2..240b9f2 100644 --- a/README.md +++ b/README.md @@ -51,8 +51,24 @@ volumes: ```bash uvicorn app.main:app --reload ``` - 3. **Tests**: - ```bash - curl "http://localhost:8000/domain/whois?domain=example.com" - ``` + +- Quick API smoke test (curl): + ```bash + curl "http://localhost:8000/domain/whois?domain=example.com" + ``` + +- Run the unit test suite with pytest (from the project root): + ```bash + # activate your virtualenv if you have one, e.g.: + source venv/bin/activate + + # install test/dev dependencies if needed + pip install -r requirements.txt + + # run all tests + pytest -q + + # run a single test file + pytest tests/test_whois_parsing.py -q + ``` diff --git a/app/main.py b/app/main.py index b2fd339..cc5e285 100644 --- a/app/main.py +++ b/app/main.py @@ -1,5 +1,23 @@ from fastapi import FastAPI from app.routers import domain +import logging +import logging.handlers +import os + +# Configure logging to server.log +log_dir = os.path.dirname(os.path.abspath(__file__)) +log_file = os.path.join(log_dir, "..", "server.log") + +# Create file handler +file_handler = logging.FileHandler(log_file) +file_handler.setLevel(logging.DEBUG) +file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +file_handler.setFormatter(file_formatter) + +# Configure root logger +root_logger = logging.getLogger() +root_logger.setLevel(logging.DEBUG) +root_logger.addHandler(file_handler) app = FastAPI( title="GPTKit", diff --git a/app/routers/domain.py b/app/routers/domain.py index cdd89f8..64ba470 100644 --- a/app/routers/domain.py +++ b/app/routers/domain.py @@ -1,8 +1,12 @@ from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel +from typing import Optional from app.services.cache import WhoisCache from app.services.whois import WhoisService from app.services.rate_limiter import RateLimiter +import logging + +logger = logging.getLogger(__name__) router = APIRouter(prefix="/domain", tags=["domain"]) @@ -14,16 +18,22 @@ class WhoisResponse(BaseModel): domain: str + checked_at: str tld: str available: bool - checked_at: str - raw: str + pendingDelete: bool = False + redemptionPeriod: bool = False + statut: Optional[str] = None + creation_date: Optional[str] = None + registrar: Optional[str] = None + # raw is intentionally omitted from the public response @router.get("/whois", response_model=WhoisResponse) async def get_whois( domain: str = Query(..., description="Domain name to check"), force: int = Query(0, description="Force fresh lookup (1 to force)") ): + logger.info(f"get_whois called for domain={domain}, force={force}") # 1. Validation if "." not in domain: raise HTTPException( @@ -36,11 +46,90 @@ async def get_whois( tld = parts[-1] # 2. Cache + def parse_whois(raw: str, tld: str): + """Extract statut, creation_date, registrar, pendingDelete, redemptionPeriod for all TLDs. + + This is heuristic: we search common WHOIS labels case-insensitively. + Returns a dict with keys 'statut', 'creation_date', 'registrar', 'pendingDelete', 'redemptionPeriod'. + """ + if not raw: + return { + "statut": None, + "creation_date": None, + "registrar": None, + "pendingDelete": False, + "redemptionPeriod": False, + } + + raw_lines = [l.strip() for l in raw.splitlines() if l.strip()] + lower = raw.lower() + + statut = None + creation_date = None + registrar = None + pendingDelete = False + redemptionPeriod = False + + import re + + # Common patterns (now generalized for all TLDs) + for line in raw_lines: + l = line.lower() + # Registrar: (ignore Registrar WHOIS Server and Registrar URL) + if registrar is None and l.startswith("registrar:") and not ("whois server" in l or "url" in l): + parts = line.split(":", 1) + if len(parts) == 2: + registrar = parts[1].strip() + continue + # Creation date + if creation_date is None and ("creation date" in l or "created on" in l or "created:" in l or "creation:" in l or "registered on" in l): + parts = line.split(":", 1) + if len(parts) == 2: + creation_date = parts[1].strip() + continue + # Status lines (can have multiple) + if "status:" in l or l.startswith("domain status"): + if statut is None: + parts = line.split(":", 1) + if len(parts) == 2: + statut = parts[1].strip() + # Check for pendingDelete and redemptionPeriod in any status line + if "pendingdelete" in l: + pendingDelete = True + if "redemptionperiod" in l: + redemptionPeriod = True + continue + + # Fallback regex for Registrar lines like 'Registrar Name' without colon + if registrar is None: + m = re.search(r"registrar\s+([\w\-\. ]{3,})", raw, re.IGNORECASE) + if m: + registrar = m.group(1).strip() + + return { + "statut": statut, + "creation_date": creation_date, + "registrar": registrar, + "pendingDelete": pendingDelete, + "redemptionPeriod": redemptionPeriod, + } + if force != 1: cached_data = cache.get(domain) if cached_data: + # enrich from raw before removing it + parsed = parse_whois(cached_data.get("raw"), tld) + # ne pas exposer le champ raw dans la réponse JSON + cached_data.pop("raw", None) + # inject parsed fields so response_model includes them + cached_data.update(parsed) + # ensure coherence: if pendingDelete or redemptionPeriod, available must be False + if cached_data.get("pendingDelete") or cached_data.get("redemptionPeriod"): + cached_data["available"] = False return cached_data + logger.debug(f"Cache miss or force=1, performing lookup for {domain}") + # 3. Rate Limiting if not rate_limiter.check(domain): raise HTTPException( @@ -68,6 +157,14 @@ async def get_whois( # Fetch back to ensure we return exactly what's in the cache (including timestamp) cached_data = cache.get(domain) if not cached_data: - raise HTTPException(status_code=500, detail="Failed to retrieve data from cache after save") - + raise HTTPException(status_code=500, detail="Failed to retrieve data from cache after save") + # enrich from raw before removing it (comme pour le cache hit) + parsed = parse_whois(cached_data.get("raw"), tld) + cached_data.pop("raw", None) + cached_data.update(parsed) + # ensure coherence: if pendingDelete or redemptionPeriod, available must be False + if cached_data.get("pendingDelete") or cached_data.get("redemptionPeriod"): + cached_data["available"] = False + return cached_data + cached_data.update(parsed) return cached_data diff --git a/app/services/cache.py b/app/services/cache.py index 7043499..87e2b90 100644 --- a/app/services/cache.py +++ b/app/services/cache.py @@ -1,9 +1,11 @@ import sqlite3 from datetime import datetime, timezone from typing import Optional, Dict, Any - +import logging import os +logger = logging.getLogger(__name__) + class WhoisCache: def __init__(self): self.db_path = "data/whois_cache.db" @@ -29,15 +31,23 @@ def get(self, domain: str) -> Optional[Dict[str, Any]]: cursor = conn.execute("SELECT * FROM whois_cache WHERE domain = ?", (domain,)) row = cursor.fetchone() if row: + logger.debug(f"Cache HIT for domain: {domain}") return dict(row) - except sqlite3.Error: + else: + logger.debug(f"Cache MISS for domain: {domain}") + except sqlite3.Error as e: + logger.error(f"Cache error on get({domain}): {e}") return None return None def set(self, domain: str, tld: str, available: bool, raw: str): checked_at = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") - with sqlite3.connect(self.db_path) as conn: - conn.execute(""" - INSERT OR REPLACE INTO whois_cache (domain, tld, available, checked_at, raw) - VALUES (?, ?, ?, ?, ?) - """, (domain, tld, available, checked_at, raw)) + try: + with sqlite3.connect(self.db_path) as conn: + conn.execute(""" + INSERT OR REPLACE INTO whois_cache (domain, tld, available, checked_at, raw) + VALUES (?, ?, ?, ?, ?) + """, (domain, tld, available, checked_at, raw)) + logger.debug(f"Cache SET for domain: {domain} (checked_at: {checked_at})") + except sqlite3.Error as e: + logger.error(f"Cache error on set({domain}): {e}") diff --git a/app/services/whois.py b/app/services/whois.py index 6a24b7a..0a01c51 100644 --- a/app/services/whois.py +++ b/app/services/whois.py @@ -11,7 +11,7 @@ def lookup(self, domain: str) -> str: try: # Using -H to suppress legal disclaimers if possible, but standard whois usually just works result = subprocess.run( - ["whois", domain], + ["whois", "-h", "whois.verisign-grs.com", domain], capture_output=True, text=True, timeout=self.timeout diff --git a/gptkit-whois-openapi.json b/gptkit-whois-openapi.json new file mode 100644 index 0000000..b277c3e --- /dev/null +++ b/gptkit-whois-openapi.json @@ -0,0 +1,112 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "GPTKit WHOIS API", + "version": "1.0.0" + }, + "servers": [ + { + "url": "https://gptkit.guillaumeduveau.com" + } + ], + "paths": { + "/domain/whois": { + "get": { + "operationId": "whoisLookup", + "summary": "WHOIS lookup for a domain", + "description": "Check WHOIS information and availability status for a single domain.", + "parameters": [ + { + "name": "domain", + "in": "query", + "required": true, + "description": "Full domain name including TLD (e.g. example.com, monsite.fr).", + "schema": { + "type": "string" + } + }, + { + "name": "force", + "in": "query", + "required": false, + "description": "If 1, bypass cache and force a fresh WHOIS lookup. Default is 0.", + "schema": { + "type": "integer", + "enum": [0, 1], + "default": 0 + } + } + ], + "responses": { + "200": { + "description": "Successful WHOIS lookup", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "domain": { "type": "string" }, + "checked_at": { "type": "string", "format": "date-time" }, + "tld": { "type": "string" }, + "available": { "type": "boolean" }, + "pendingDelete": { "type": "boolean" }, + "redemptionPeriod": { "type": "boolean" }, + "statut": { "type": "string", "nullable": true }, + "creation_date": { "type": "string", "nullable": true }, + "registrar": { "type": "string", "nullable": true } + }, + "required": ["domain", "checked_at", "tld", "available", "pendingDelete", "redemptionPeriod"] + } + } + } + }, + "400": { + "description": "Invalid domain", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { "type": "string", "example": "invalid_domain" }, + "message": { "type": "string" } + }, + "required": ["error", "message"] + } + } + } + }, + "429": { + "description": "Rate limit exceeded", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { "type": "string", "example": "rate_limited" }, + "message": { "type": "string" } + }, + "required": ["error", "message"] + } + } + } + }, + "500": { + "description": "WHOIS error or internal error", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { "type": "string", "example": "whois_error" }, + "message": { "type": "string" } + }, + "required": ["error", "message"] + } + } + } + } + } + } + } + } +} diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..3a28dd0 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,3 @@ +# Development dependencies +-r requirements.txt +pytest diff --git a/specs.md b/specs.md deleted file mode 100644 index fadd879..0000000 --- a/specs.md +++ /dev/null @@ -1,227 +0,0 @@ -# GPTKit — Specification du module WHOIS - -Ce document définit **la version propre et finale** du module WHOIS de **GPTKit**, en repartant de zéro. -Aucun code, aucune technologie imposée : uniquement le **contrat**, le **comportement**, et les **contraintes**. - -GPTKit est destiné **exclusivement** à être utilisé via un **Custom GPT web** (Actions HTTP / OpenAPI). - ---- - -# 1. Concept global — GPTKit - -GPTKit est un **backend unique**, organisé en *namespaces*, destiné à regrouper plusieurs outils consommables par un Custom GPT via des Actions HTTP. - -Namespaces prévus : -- `/domain/*` — outils liés aux domaines (WHOIS, DNS, HTTP check, SSL…) -- `/seo/*` — outils SEO (futur) -- `/utils/*` — utilitaires divers (futur) - -## Contraintes générales -- Un **seul service HTTP**, un seul host/port. -- **JSON uniquement** en réponse. -- Pas de HTML, pas de streaming, pas de WebSocket. -- Endpoints **idempotents** pour les lectures (GET). -- Chaque endpoint doit être **simple à décrire dans OpenAPI**. - ---- - -# 2. Module WHOIS — Aperçu - -Le module WHOIS fournit **un seul endpoint** permettant : -- une lookup WHOIS fiable -- une détermination simple de disponibilité -- un cache persistant -- un contrôle du rate limiting -- un comportement déterministe pour GPT - -Ce module est exposé sous : -``` -GET /domain/whois -``` - ---- - -# 3. API WHOIS — Détails complets - -## 3.1. Paramètres (query) - -### `domain` (string, requis) -- Doit contenir un domaine complet : `example.com`, `monsite.fr`, etc. -- Doit contenir au moins un `.`. -- Exemples invalides : `habitatleger`, `domain`, `test`. - -### `force` (integer, optionnel, défaut = 0) -- `0` : utiliser le cache si existant. -- `1` : ignorer le cache et exécuter un WHOIS frais. - ---- - -## 3.2. Logique interne - -### Étape 1 — Validation -- Vérifier la présence de `domain`. -- Vérifier qu’il contient au moins un `.`. -- Extraire le TLD comme la sous-chaîne après le dernier `.`. - -### Étape 2 — Cache persistant -- Si `domain` est dans le cache ET `force != 1` : - - Retourner immédiatement le contenu du cache. - - **Aucun rate limiting appliqué**. - - **Aucun WHOIS exécuté**. - -### Étape 3 — Rate limiting (si WHOIS requis) -- Appliquer un **rate limit global** (ex : par minute / par heure). -- Appliquer un **rate limit par domaine** (anti-spam via `force=1`). -- En cas de dépassement : renvoyer **HTTP 429**. - -### Étape 4 — Exécution WHOIS -- Exécuter **une seule** commande WHOIS système. -- Timeout strict recommandé (ex : 5s). -- Ne jamais re-tenter automatiquement. - -### Étape 5 — Détermination disponibilité -Patterns minimaux : -- `.com` : "No match", "NOT FOUND" -- `.fr` : "Status: FREE", "No entries found" -- autres TLD : variantes similaires ("NOT FOUND", etc.) - -Si match → `available = true` -Sinon → `available = false` - -### Étape 6 — Mise à jour du cache -- Écraser l’entrée précédente. -- Stocker `domain`, `tld`, `available`, `checked_at`, `raw`. - -### Étape 7 — Réponse JSON -- Retourner la réponse formelle décrite ci-dessous. - ---- - -# 4. Réponse (succès) - -```json -{ - "domain": "example.com", - "tld": "com", - "available": true, - "checked_at": "2025-01-01T12:00:00Z", - "raw": "raw whois output here..." -} -``` - -Signification : -- `domain` : domaine demandé -- `tld` : TLD extrait -- `available` : booléen basé sur patterns -- `checked_at` : timestamp ISO-8601 UTC -- `raw` : WHOIS brut (tronqué si nécessaire) - ---- - -# 5. Réponses d’erreur - -## 400 — domaine invalide -```json -{ - "error": "invalid_domain", - "message": "Domain must include a TLD (example: site.com)." -} -``` - -## 429 — rate limit -```json -{ - "error": "rate_limited", - "message": "WHOIS rate limit exceeded." -} -``` - -## 500 — WHOIS / interne -```json -{ - "error": "whois_error", - "message": "WHOIS lookup failed or timed out." -} -``` - ---- - -# 6. Cache — Spécification - -Propriétés : -- **Persistant** entre redémarrages. -- Pas d’expiration automatique. -- **Jamais** utilisé si `force = 1`. -- Format interne suggéré : - -``` -{ - "domain": "example.com", - "tld": "com", - "available": true, - "checked_at": "...", - "raw": "..." -} -``` - -- Les lectures depuis le cache **n’utilisent pas le rate limit**. - ---- - -# 7. Rate Limiting — Spécification - -S’applique **uniquement** lorsqu’un WHOIS doit être exécuté. - -## Deux niveaux : - -### 1. Rate limit global -- Ex : nombre max WHOIS/minute ou WHOIS/heure. -- Valeurs exactes à définir côté implémentation. - -### 2. Rate limit par domaine -- Empêche l’abus de `force=1`. - -## En cas de dépassement : -- Retourner HTTP `429` + JSON. -- Ne pas exécuter WHOIS. - ---- - -# 8. Intégration dans GPTKit - -WHOIS devient l’un des modules de : -``` -/domain/whois -/domain/dns (futur) -/domain/http (futur) -/domain/ssl (futur) -``` - -Les Custom GPT web pourront déclarer **plusieurs Actions HTTP**, chacune ciblant une partie des endpoints de GPTKit. - ---- - -# 9. Non-objectifs - -Ne pas implémenter dans ce module : - -- résolution DNS -- batch WHOIS -- scan multi-TLD -- UI / HTML -- streaming / websockets -- jobs automatiques -- authentification -- interactions registrar - ---- - -# 10. Résumé final - -- Endpoint : `GET /domain/whois` -- Params : `domain` (requis), `force` (optionnel) -- Cache persistant + rate limiting seulement si WHOIS exécuté -- JSON propre et stable -- Compatible OpenAPI / Actions Custom GPT -- Module strictement délimité, prêt à être implémenté - diff --git a/tests/data/whois-assiste.com b/tests/data/whois-assiste.com new file mode 100644 index 0000000..9cce4cf --- /dev/null +++ b/tests/data/whois-assiste.com @@ -0,0 +1,57 @@ + Domain Name: ASSISTE.COM + Registry Domain ID: 103574586_DOMAIN_COM-VRSN + Registrar WHOIS Server: whois.gandi.net + Registrar URL: http://www.gandi.net + Updated Date: 2025-11-29T12:21:45Z + Creation Date: 2003-09-15T11:32:57Z + Registry Expiry Date: 2025-09-15T11:32:57Z + Registrar: Gandi SAS + Registrar IANA ID: 81 + Registrar Abuse Contact Email: abuse@support.gandi.net + Registrar Abuse Contact Phone: +33.170377661 + Domain Status: clientHold https://icann.org/epp#clientHold + Domain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited + Domain Status: pendingDelete https://icann.org/epp#pendingDelete + Name Server: DNS108.OVH.NET + Name Server: NS108.OVH.NET + DNSSEC: unsigned + URL of the ICANN Whois Inaccuracy Complaint Form: https://www.icann.org/wicf/ +>>> Last update of whois database: 2025-12-03T11:38:03Z <<< + +For more information on Whois status codes, please visit https://icann.org/epp + +NOTICE: The expiration date displayed in this record is the date the +registrar's sponsorship of the domain name registration in the registry is +currently set to expire. This date does not necessarily reflect the expiration +date of the domain name registrant's agreement with the sponsoring +registrar. Users may consult the sponsoring registrar's Whois database to +view the registrar's reported date of expiration for this registration. + +TERMS OF USE: You are not authorized to access or query our Whois +database through the use of electronic processes that are high-volume and +automated except as reasonably necessary to register domain names or +modify existing registrations; the Data in VeriSign Global Registry +Services' ("VeriSign") Whois database is provided by VeriSign for +information purposes only, and to assist persons in obtaining information +about or related to a domain name registration record. VeriSign does not +guarantee its accuracy. By submitting a Whois query, you agree to abide +by the following terms of use: You agree that you may use this Data only +for lawful purposes and that under no circumstances will you use this Data +to: (1) allow, enable, or otherwise support the transmission of mass +unsolicited, commercial advertising or solicitations via e-mail, telephone, +or facsimile; or (2) enable high volume, automated, electronic processes +that apply to VeriSign (or its computer systems). The compilation, +repackaging, dissemination or other use of this Data is expressly +prohibited without the prior written consent of VeriSign. You agree not to +use electronic processes that are automated and high-volume to access or +query the Whois database except as reasonably necessary to register +domain names or modify existing registrations. VeriSign reserves the right +to restrict your access to the Whois database in its sole discretion to ensure +operational stability. VeriSign may restrict or terminate your access to the +Whois database for failure to abide by these terms of use. VeriSign +reserves the right to modify these terms at any time. + +The Registry database contains ONLY .COM, .NET, .EDU domains and +Registrars. +# Not found +--- ~ \ No newline at end of file diff --git a/tests/data/whois-cadeaux.com b/tests/data/whois-cadeaux.com new file mode 100644 index 0000000..ccf401b --- /dev/null +++ b/tests/data/whois-cadeaux.com @@ -0,0 +1,55 @@ + Domain Name: CADEAUX.COM + Registry Domain ID: 86531033_DOMAIN_COM-VRSN + Registrar WHOIS Server: whois.ovh.com + Registrar URL: http://www.ovh.com + Updated Date: 2025-05-14T07:32:09Z + Creation Date: 2002-05-13T18:12:06Z + Registry Expiry Date: 2026-05-13T18:12:06Z + Registrar: OVH sas + Registrar IANA ID: 433 + Registrar Abuse Contact Email: abuse@ovh.net + Registrar Abuse Contact Phone: +33.972101007 + Domain Status: clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited + Domain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited + Name Server: DNS200.ANYCAST.ME + Name Server: NS200.ANYCAST.ME + DNSSEC: signedDelegation + DNSSEC DS Data: 54041 8 2 97EA5EC577E49AE10B1991BFE25CFE67DD30C63EA383E423C6ED0F15700D1CDE + URL of the ICANN Whois Inaccuracy Complaint Form: https://www.icann.org/wicf/ +>>> Last update of whois database: 2025-12-03T11:29:43Z <<< + +For more information on Whois status codes, please visit https://icann.org/epp + +NOTICE: The expiration date displayed in this record is the date the +registrar's sponsorship of the domain name registration in the registry is +currently set to expire. This date does not necessarily reflect the expiration +date of the domain name registrant's agreement with the sponsoring +registrar. Users may consult the sponsoring registrar's Whois database to +view the registrar's reported date of expiration for this registration. + +TERMS OF USE: You are not authorized to access or query our Whois +database through the use of electronic processes that are high-volume and +automated except as reasonably necessary to register domain names or +modify existing registrations; the Data in VeriSign Global Registry +Services' ("VeriSign") Whois database is provided by VeriSign for +information purposes only, and to assist persons in obtaining information +about or related to a domain name registration record. VeriSign does not +guarantee its accuracy. By submitting a Whois query, you agree to abide +by the following terms of use: You agree that you may use this Data only +for lawful purposes and that under no circumstances will you use this Data +to: (1) allow, enable, or otherwise support the transmission of mass +unsolicited, commercial advertising or solicitations via e-mail, telephone, +or facsimile; or (2) enable high volume, automated, electronic processes +that apply to VeriSign (or its computer systems). The compilation, +repackaging, dissemination or other use of this Data is expressly +prohibited without the prior written consent of VeriSign. You agree not to +use electronic processes that are automated and high-volume to access or +query the Whois database except as reasonably necessary to register +domain names or modify existing registrations. VeriSign reserves the right +to restrict your access to the Whois database in its sole discretion to ensure +operational stability. VeriSign may restrict or terminate your access to the +Whois database for failure to abide by these terms of use. VeriSign +reserves the right to modify these terms at any time. + +The Registry database contains ONLY .COM, .NET, .EDU domains and +Registrars. \ No newline at end of file diff --git a/tests/test_whois_parsing.py b/tests/test_whois_parsing.py new file mode 100644 index 0000000..0efca6e --- /dev/null +++ b/tests/test_whois_parsing.py @@ -0,0 +1,64 @@ +import os +import pytest + +def parse_whois(raw: str, tld: str): + if not raw: + return {"statut": None, "creation_date": None, "registrar": None, "pendingDelete": False, "redemptionPeriod": False} + raw_lines = [l.strip() for l in raw.splitlines() if l.strip()] + statut = None + creation_date = None + registrar = None + pendingDelete = False + redemptionPeriod = False + import re + for line in raw_lines: + l = line.lower() + if registrar is None and l.startswith("registrar:") and not ("whois server" in l or "url" in l): + parts = line.split(":", 1) + if len(parts) == 2: + registrar = parts[1].strip() + continue + if creation_date is None and ("creation date" in l or "created on" in l or "created:" in l or "creation:" in l or "registered on" in l): + parts = line.split(":", 1) + if len(parts) == 2: + creation_date = parts[1].strip() + continue + if "status:" in l or l.startswith("domain status"): + if statut is None: + parts = line.split(":", 1) + if len(parts) == 2: + statut = parts[1].strip() + if "pendingdelete" in l: + pendingDelete = True + if "redemptionperiod" in l: + redemptionPeriod = True + continue + if registrar is None: + m = re.search(r"registrar\s+([\w\-\. ]{3,})", raw, re.IGNORECASE) + if m: + registrar = m.group(1).strip() + return {"statut": statut, "creation_date": creation_date, "registrar": registrar, "pendingDelete": pendingDelete, "redemptionPeriod": redemptionPeriod} + +def test_parse_whois_cadeaux_com(): + path = os.path.join(os.path.dirname(__file__), "data", "whois-cadeaux.com") + with open(path, encoding="utf-8") as f: + raw = f.read() + tld = "com" + result = parse_whois(raw, tld) + assert result["statut"] is not None, f"statut should not be None, got {result['statut']}" + assert result["creation_date"] == "2002-05-13T18:12:06Z" + assert result["registrar"] == "OVH sas" + assert result["pendingDelete"] == False + assert result["redemptionPeriod"] == False + +def test_parse_whois_assiste_com(): + path = os.path.join(os.path.dirname(__file__), "data", "whois-assiste.com") + with open(path, encoding="utf-8") as f: + raw = f.read() + tld = "com" + result = parse_whois(raw, tld) + assert result["statut"] is not None, f"statut should not be None, got {result['statut']}" + assert result["creation_date"] == "2003-09-15T11:32:57Z" + assert result["registrar"] == "Gandi SAS" + assert result["pendingDelete"] == True, f"pendingDelete should be True, got {result['pendingDelete']}" + assert result["redemptionPeriod"] == False