From d2ccdf47f49233a78c2a91cf8d667f46a34f36f7 Mon Sep 17 00:00:00 2001
From: Lars Kroehl <kersten.kroehl@cryptokri.ch>
Date: Mon, 25 May 2026 12:29:36 +0200
Subject: [PATCH] feat(trust-score): F2 cold-start score from public data for
 new agents
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 2 trust scoring withholds a score for agents with fewer than three
endorsements, leaving brand-new agents indistinguishable from inactive
ones. This change derives a `cold_start_score` from public signals so
the onboarding response carries something honest instead of an
ambiguous null.

Sources (max contribution each):
- On-chain wallet history via Basescan (Base L2) ........ 20 pts
  tx_count / 10 (cap 8), age_days / 30 (cap 7), usdc_volume / 1000 (cap 5)
- GitHub account activity (when github_url available) ... 15 pts
- ERC-8004 registry presence (local outreach scan) ...... 10 pts
Hard ceiling: 60.0 — a cold-start agent can never appear stronger than
an endorsed one. Score is `null` with basis `no_public_data` when no
source returns data (deliberate — we do not fabricate a default).

Integration: `GET /skill/trust-score/{did}` returns the new
`cold_start_*` fields ONLY when `endorser_count == 0`. As soon as
real endorsements arrive the Phase 2 score takes over.

Cache: 24h TTL, stored on `agents.cold_start_*` columns to avoid
hammering Basescan/GitHub on every lookup.

Changes:
- app/migrations/007_cold_start_score.sql — 4 columns on `agents`
- app/cold_start.py — fetchers + pure scoring + 24h-cached orchestrator
- app/main.py — non-breaking addition to `/skill/trust-score/{did}`
- tests/test_cold_start.py — 10 pure-unit tests for the scoring formula

Out of scope (documented as future work):
- `github_url` is not yet a column on `agents`; the GitHub fetcher is
  wired but currently fed `None`. Activates once registration captures
  the field.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 app/cold_start.py                       | 278 ++++++++++++++++++++++++
 app/main.py                             |   9 +
 app/migrations/007_cold_start_score.sql |  18 ++
 tests/test_cold_start.py                | 126 +++++++++++
 4 files changed, 431 insertions(+)
 create mode 100644 app/cold_start.py
 create mode 100644 app/migrations/007_cold_start_score.sql
 create mode 100644 tests/test_cold_start.py

diff --git a/app/cold_start.py b/app/cold_start.py
new file mode 100644
index 0000000..3301c7f
--- /dev/null
+++ b/app/cold_start.py
@@ -0,0 +1,278 @@
+"""F2 Cold-Start Score — derive a startup score for agents without endorsements.
+
+Phase 2 trust scoring withholds a score when an agent has fewer than three
+endorsements. That makes brand-new agents indistinguishable from inactive
+ones. The cold-start score fills that gap by surfacing what *public*
+signals are available (on-chain wallet history, GitHub activity, ERC-8004
+registry presence) without inventing a number when none exist.
+
+Contract (per Whitepaper v4 follow-up "Onboarding Q3 2026"):
+
+- Only applies when `endorser_count == 0`. Once endorsements arrive, the
+  Phase 2 score replaces this estimate.
+- A NULL score with basis `"no_public_data"` is honest — never a fabricated
+  default.
+- Cap at 60.0 so a cold-start agent can never appear stronger than an
+  endorsed one.
+- Cached in `agents.cold_start_*` for 24h to avoid hammering Basescan and
+  GitHub on every trust-score lookup.
+
+This module is HTTP-tolerant: any fetcher that fails returns `None` and the
+score falls back to the remaining sources.
+"""
+from __future__ import annotations
+
+import datetime
+import json
+import logging
+import os
+from typing import Optional
+from urllib.parse import quote
+from urllib.request import Request, urlopen
+
+import asyncpg
+
+log = logging.getLogger("moltrust.cold_start")
+
+CACHE_TTL_HOURS = 24
+COLD_START_CAP = 60.0
+HTTP_TIMEOUT_SECONDS = 8
+
+BASESCAN_API = "https://api.basescan.org/api"
+GITHUB_API = "https://api.github.com"
+
+
+# ---------------------------------------------------------------------------
+# External fetchers (HTTP) — kept tiny, return None on any failure
+# ---------------------------------------------------------------------------
+
+def _http_get_json(url: str, headers: Optional[dict] = None) -> Optional[dict]:
+    try:
+        req = Request(url, headers={"User-Agent": "MolTrust-ColdStart/1.0", **(headers or {})})
+        with urlopen(req, timeout=HTTP_TIMEOUT_SECONDS) as r:  # noqa: S310 — URL is constructed from API constants
+            return json.loads(r.read())
+    except Exception as e:
+        log.warning("cold-start http fetch failed for %s: %s", url, e)
+        return None
+
+
+def fetch_basescan_wallet(wallet: str) -> Optional[dict]:
+    """Return {tx_count, age_days, usdc_volume} from Basescan, or None on failure.
+
+    USDC volume is approximated as the count of USDC token transfers — Basescan's
+    ERC-20 transfer endpoint gives us the per-tx amounts but the cold-start
+    formula caps at $1000-equivalent, so the rough sum is sufficient.
+    """
+    api_key = os.environ.get("BASESCAN_API_KEY", "")
+    if not api_key or not wallet:
+        return None
+
+    # 1) Tx list — most recent up to 10k
+    tx_url = (
+        f"{BASESCAN_API}?module=account&action=txlist"
+        f"&address={quote(wallet)}&startblock=0&endblock=99999999"
+        f"&sort=asc&apikey={quote(api_key)}"
+    )
+    tx_data = _http_get_json(tx_url)
+    if not tx_data or tx_data.get("status") != "1":
+        return None
+    txs = tx_data.get("result", []) or []
+    tx_count = len(txs)
+    if tx_count == 0:
+        return {"tx_count": 0, "age_days": 0, "usdc_volume": 0.0}
+
+    try:
+        first_ts = int(txs[0].get("timeStamp", 0))
+    except (TypeError, ValueError):
+        first_ts = 0
+    age_days = max(0, int((datetime.datetime.utcnow().timestamp() - first_ts) / 86400)) if first_ts else 0
+
+    # 2) USDC volume (Base USDC contract). Cheap approximation: count + sum
+    usdc_contract = "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913"
+    erc_url = (
+        f"{BASESCAN_API}?module=account&action=tokentx"
+        f"&contractaddress={usdc_contract}"
+        f"&address={quote(wallet)}&sort=asc&apikey={quote(api_key)}"
+    )
+    erc_data = _http_get_json(erc_url)
+    usdc_volume = 0.0
+    if erc_data and erc_data.get("status") == "1":
+        for t in erc_data.get("result", []) or []:
+            try:
+                # USDC has 6 decimals on Base
+                usdc_volume += float(t.get("value", 0)) / 1e6
+            except (TypeError, ValueError):
+                pass
+
+    return {"tx_count": tx_count, "age_days": age_days, "usdc_volume": usdc_volume}
+
+
+def fetch_github_user(username: str) -> Optional[dict]:
+    """Return {public_repos, account_age_days, recent_commits} from GitHub, or None.
+
+    `recent_commits` uses the public events feed (last 90 days, push events only).
+    """
+    if not username:
+        return None
+    token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN", "")
+    headers = {"Accept": "application/vnd.github+json"}
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+
+    user = _http_get_json(f"{GITHUB_API}/users/{quote(username)}", headers=headers)
+    if not user or "login" not in user:
+        return None
+
+    try:
+        created_at = datetime.datetime.fromisoformat(user["created_at"].replace("Z", "+00:00"))
+        age_days = max(0, (datetime.datetime.now(datetime.timezone.utc) - created_at).days)
+    except (KeyError, ValueError):
+        age_days = 0
+
+    # Recent commits: list push events from the last 90 days
+    events = _http_get_json(f"{GITHUB_API}/users/{quote(username)}/events/public", headers=headers) or []
+    cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=90)
+    recent_commits = 0
+    if isinstance(events, list):
+        for ev in events:
+            if ev.get("type") != "PushEvent":
+                continue
+            try:
+                ev_ts = datetime.datetime.fromisoformat(ev.get("created_at", "").replace("Z", "+00:00"))
+            except ValueError:
+                continue
+            if ev_ts >= cutoff:
+                payload = ev.get("payload") or {}
+                recent_commits += len(payload.get("commits") or [])
+
+    return {
+        "public_repos": int(user.get("public_repos", 0) or 0),
+        "account_age_days": age_days,
+        "recent_commits": recent_commits,
+    }
+
+
+async def check_erc8004_match(wallet: str, conn: asyncpg.Connection) -> bool:
+    """True if this wallet shows up in our local erc8004_outreach scan."""
+    if not wallet:
+        return False
+    try:
+        row = await conn.fetchrow(
+            "SELECT 1 FROM erc8004_outreach WHERE LOWER(wallet_address) = LOWER($1) LIMIT 1",
+            wallet,
+        )
+        return row is not None
+    except asyncpg.UndefinedTableError:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Pure scoring — no IO, easy to test
+# ---------------------------------------------------------------------------
+
+def calculate_cold_start_score(
+    wallet_data: Optional[dict],
+    github_data: Optional[dict],
+    erc8004_match: bool,
+) -> tuple[Optional[float], str, str]:
+    """Score in [0, 60], basis tag, and confidence label.
+
+    Returns (None, "no_public_data", "none") when nothing is available — by
+    design, we never fabricate a starting score from thin air.
+    """
+    score = 0.0
+    basis: list[str] = []
+
+    if wallet_data and wallet_data.get("tx_count", 0) > 0:
+        tx_score = min(wallet_data.get("tx_count", 0) / 10, 8)
+        age_score = min(wallet_data.get("age_days", 0) / 30, 7)
+        vol_score = min(wallet_data.get("usdc_volume", 0) / 1000, 5)
+        score += tx_score + age_score + vol_score
+        basis.append("onchain")
+
+    if github_data and github_data.get("public_repos", 0) > 0:
+        repo_score = min(github_data.get("public_repos", 0) / 3, 5)
+        age_score = min(github_data.get("account_age_days", 0) / 60, 5)
+        commit_score = min(github_data.get("recent_commits", 0) / 10, 5)
+        score += repo_score + age_score + commit_score
+        basis.append("github")
+
+    if erc8004_match:
+        score += 10
+        basis.append("erc8004")
+
+    if not basis:
+        return None, "no_public_data", "none"
+
+    confidence = "high" if len(basis) >= 2 else "medium" if len(basis) == 1 else "low"
+    return min(round(score, 1), COLD_START_CAP), "+".join(basis), confidence
+
+
+# ---------------------------------------------------------------------------
+# Orchestrator + 24h cache
+# ---------------------------------------------------------------------------
+
+async def get_cold_start_score(did: str, conn: asyncpg.Connection) -> dict:
+    """Return the cold-start payload for an agent, computing or returning a cached value.
+
+    Result shape:
+        {
+            "cold_start_score": float | None,
+            "cold_start_basis": str,
+            "cold_start_confidence": "none"|"low"|"medium"|"high",
+            "cold_start_computed_at": iso str | None,
+            "cold_start_note": str,
+        }
+    """
+    row = await conn.fetchrow(
+        "SELECT wallet_address, cold_start_score, cold_start_basis, "
+        "cold_start_confidence, cold_start_computed_at "
+        "FROM agents WHERE did = $1",
+        did,
+    )
+    if row is None:
+        return _payload(None, "no_public_data", "none", None)
+
+    computed_at = row["cold_start_computed_at"]
+    if computed_at:
+        age = datetime.datetime.utcnow() - (
+            computed_at.replace(tzinfo=None) if computed_at.tzinfo else computed_at
+        )
+        if age < datetime.timedelta(hours=CACHE_TTL_HOURS):
+            return _payload(
+                row["cold_start_score"],
+                row["cold_start_basis"] or "no_public_data",
+                row["cold_start_confidence"] or "none",
+                computed_at,
+            )
+
+    wallet = row["wallet_address"]
+    wallet_data = fetch_basescan_wallet(wallet) if wallet else None
+    erc8004_match = await check_erc8004_match(wallet, conn) if wallet else False
+    # GitHub username is not yet a first-class field on agents; once the
+    # registration schema carries it this branch will activate. Until then
+    # we explicitly pass None rather than guessing.
+    github_data = None
+
+    score, basis, confidence = calculate_cold_start_score(wallet_data, github_data, erc8004_match)
+    now = datetime.datetime.utcnow()
+    await conn.execute(
+        "UPDATE agents SET cold_start_score = $1, cold_start_basis = $2, "
+        "cold_start_confidence = $3, cold_start_computed_at = $4 WHERE did = $5",
+        score, basis, confidence, now, did,
+    )
+    return _payload(score, basis, confidence, now)
+
+
+def _payload(score: Optional[float], basis: str, confidence: str, computed_at) -> dict:
+    if score is None:
+        note = "No public history found. Score will build from first interactions."
+    else:
+        note = "Score based on public data. Will be replaced by behavioral history."
+    return {
+        "cold_start_score": score,
+        "cold_start_basis": basis,
+        "cold_start_confidence": confidence,
+        "cold_start_computed_at": computed_at.isoformat() if computed_at else None,
+        "cold_start_note": note,
+    }
diff --git a/app/main.py b/app/main.py
index 8fd129b..87ae58b 100644
--- a/app/main.py
+++ b/app/main.py
@@ -1386,6 +1386,15 @@ async def get_trust_score(did: str):
             }
             # CAEP: alias valid_until = cache_valid_until (keep both, non-breaking)
             score_response["valid_until"] = score_response["cache_valid_until"]
+            # F2 cold-start: surface a public-data-derived starting score for
+            # agents that have not yet collected any endorsements. Honest
+            # nulls when no public data exists — see app/cold_start.py.
+            if result.get("endorser_count", 0) == 0:
+                from app.cold_start import get_cold_start_score
+                try:
+                    score_response.update(await get_cold_start_score(did, conn))
+                except Exception as cs_err:
+                    logger.warning("cold-start failed for %s: %s", did, cs_err)
             # CAEP: sign deterministic minimal payload with registry key
             if score_response["computed_at"] and score_response["valid_until"]:
                 from app.signature import sign_payload, build_score_signing_payload
diff --git a/app/migrations/007_cold_start_score.sql b/app/migrations/007_cold_start_score.sql
new file mode 100644
index 0000000..efef731
--- /dev/null
+++ b/app/migrations/007_cold_start_score.sql
@@ -0,0 +1,18 @@
+-- F2 Cold-Start Score (Whitepaper v4, follow-up "Onboarding Q3 2026")
+--
+-- Adds four columns to `agents` so we can cache a public-data-derived score
+-- for agents that have not yet accumulated behavioral history (i.e. zero
+-- endorsements). Cache TTL is 24h (enforced in app/cold_start.py).
+--
+-- Score sources:
+--   * On-chain wallet history (Base L2 via Basescan)  — up to 20 points
+--   * GitHub account activity (when available)        — up to 15 points
+--   * ERC-8004 registry presence                       — up to 10 points
+--
+-- A NULL `cold_start_score` with basis = 'no_public_data' means the agent
+-- has no externally-derivable score — we deliberately do not invent a value.
+
+ALTER TABLE agents ADD COLUMN IF NOT EXISTS cold_start_score      FLOAT;
+ALTER TABLE agents ADD COLUMN IF NOT EXISTS cold_start_basis      VARCHAR(100);
+ALTER TABLE agents ADD COLUMN IF NOT EXISTS cold_start_confidence VARCHAR(10);
+ALTER TABLE agents ADD COLUMN IF NOT EXISTS cold_start_computed_at TIMESTAMP;
diff --git a/tests/test_cold_start.py b/tests/test_cold_start.py
new file mode 100644
index 0000000..d11cf21
--- /dev/null
+++ b/tests/test_cold_start.py
@@ -0,0 +1,126 @@
+"""F2 Cold-Start Score — pure unit tests for calculate_cold_start_score.
+
+The orchestrator (`get_cold_start_score`) has a database + HTTP surface that
+the in-repo CI does not stand up; the formula itself is pure and exhaustively
+covered here. Edge cases included: empty data, partial sources, source caps,
+and the hard 60-point ceiling.
+"""
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+
+from app.cold_start import calculate_cold_start_score, COLD_START_CAP  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# No data → honest null
+# ---------------------------------------------------------------------------
+
+def test_no_data_returns_none():
+    score, basis, confidence = calculate_cold_start_score(None, None, False)
+    assert score is None
+    assert basis == "no_public_data"
+    assert confidence == "none"
+
+
+def test_empty_wallet_data_returns_none():
+    score, basis, confidence = calculate_cold_start_score({"tx_count": 0}, None, False)
+    assert score is None
+    assert basis == "no_public_data"
+
+
+# ---------------------------------------------------------------------------
+# Single source — confidence "medium"
+# ---------------------------------------------------------------------------
+
+def test_onchain_only_medium_confidence():
+    score, basis, confidence = calculate_cold_start_score(
+        {"tx_count": 50, "age_days": 100, "usdc_volume": 2000}, None, False
+    )
+    # tx_score=min(50/10,8)=5, age_score=min(100/30,7)=3.33, vol_score=min(2000/1000,5)=2
+    # total ≈ 10.33
+    assert score is not None
+    assert 9.5 <= score <= 11.0
+    assert basis == "onchain"
+    assert confidence == "medium"
+
+
+def test_github_only_medium_confidence():
+    score, basis, confidence = calculate_cold_start_score(
+        None,
+        {"public_repos": 9, "account_age_days": 180, "recent_commits": 25},
+        False,
+    )
+    # repo=min(9/3,5)=3, age=min(180/60,5)=3, commit=min(25/10,5)=2.5 = 8.5
+    assert 8.0 <= score <= 9.0
+    assert basis == "github"
+    assert confidence == "medium"
+
+
+def test_erc8004_only_medium_confidence():
+    score, basis, confidence = calculate_cold_start_score(None, None, True)
+    assert score == 10.0
+    assert basis == "erc8004"
+    assert confidence == "medium"
+
+
+# ---------------------------------------------------------------------------
+# Multiple sources — confidence "high"
+# ---------------------------------------------------------------------------
+
+def test_onchain_plus_github_high_confidence():
+    score, basis, confidence = calculate_cold_start_score(
+        {"tx_count": 50, "age_days": 100, "usdc_volume": 2000},
+        {"public_repos": 9, "account_age_days": 180, "recent_commits": 25},
+        False,
+    )
+    assert score is not None
+    # ~10.33 + ~8.5 ≈ 18.8
+    assert 17.5 <= score <= 19.5
+    assert basis == "onchain+github"
+    assert confidence == "high"
+
+
+def test_all_three_sources_high_confidence():
+    score, basis, confidence = calculate_cold_start_score(
+        {"tx_count": 50, "age_days": 100, "usdc_volume": 2000},
+        {"public_repos": 9, "account_age_days": 180, "recent_commits": 25},
+        True,
+    )
+    assert score is not None
+    # ~18.8 + 10 (erc8004) ≈ 28.8
+    assert 27.5 <= score <= 30.0
+    assert basis == "onchain+github+erc8004"
+    assert confidence == "high"
+
+
+# ---------------------------------------------------------------------------
+# Caps — per-source AND hard 60.0 ceiling
+# ---------------------------------------------------------------------------
+
+def test_onchain_source_caps_at_20():
+    score, _, _ = calculate_cold_start_score(
+        {"tx_count": 100_000, "age_days": 10_000, "usdc_volume": 10_000_000}, None, False
+    )
+    # cap: 8 + 7 + 5 = 20
+    assert score == 20.0
+
+
+def test_github_source_caps_at_15():
+    score, _, _ = calculate_cold_start_score(
+        None, {"public_repos": 1000, "account_age_days": 10_000, "recent_commits": 1000}, False,
+    )
+    assert score == 15.0
+
+
+def test_overall_score_cannot_exceed_60():
+    score, _, _ = calculate_cold_start_score(
+        {"tx_count": 100_000, "age_days": 10_000, "usdc_volume": 10_000_000},
+        {"public_repos": 1000, "account_age_days": 10_000, "recent_commits": 1000},
+        True,
+    )
+    # 20 + 15 + 10 = 45 — well below the ceiling, but let's also confirm the
+    # constant exists and is honoured.
+    assert score <= COLD_START_CAP
+    assert score == 45.0