From d2ccdf47f49233a78c2a91cf8d667f46a34f36f7 Mon Sep 17 00:00:00 2001 From: Lars Kroehl Date: Mon, 25 May 2026 12:29:36 +0200 Subject: [PATCH] feat(trust-score): F2 cold-start score from public data for new agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 trust scoring withholds a score for agents with fewer than three endorsements, leaving brand-new agents indistinguishable from inactive ones. This change derives a `cold_start_score` from public signals so the onboarding response carries something honest instead of an ambiguous null. Sources (max contribution each): - On-chain wallet history via Basescan (Base L2) ........ 20 pts tx_count / 10 (cap 8), age_days / 30 (cap 7), usdc_volume / 1000 (cap 5) - GitHub account activity (when github_url available) ... 15 pts - ERC-8004 registry presence (local outreach scan) ...... 10 pts Hard ceiling: 60.0 — a cold-start agent can never appear stronger than an endorsed one. Score is `null` with basis `no_public_data` when no source returns data (deliberate — we do not fabricate a default). Integration: `GET /skill/trust-score/{did}` returns the new `cold_start_*` fields ONLY when `endorser_count == 0`. As soon as real endorsements arrive the Phase 2 score takes over. Cache: 24h TTL, stored on `agents.cold_start_*` columns to avoid hammering Basescan/GitHub on every lookup. Changes: - app/migrations/007_cold_start_score.sql — 4 columns on `agents` - app/cold_start.py — fetchers + pure scoring + 24h-cached orchestrator - app/main.py — non-breaking addition to `/skill/trust-score/{did}` - tests/test_cold_start.py — 10 pure-unit tests for the scoring formula Out of scope (documented as future work): - `github_url` is not yet a column on `agents`; the GitHub fetcher is wired but currently fed `None`. Activates once registration captures the field. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/cold_start.py | 278 ++++++++++++++++++++++++ app/main.py | 9 + app/migrations/007_cold_start_score.sql | 18 ++ tests/test_cold_start.py | 126 +++++++++++ 4 files changed, 431 insertions(+) create mode 100644 app/cold_start.py create mode 100644 app/migrations/007_cold_start_score.sql create mode 100644 tests/test_cold_start.py diff --git a/app/cold_start.py b/app/cold_start.py new file mode 100644 index 0000000..3301c7f --- /dev/null +++ b/app/cold_start.py @@ -0,0 +1,278 @@ +"""F2 Cold-Start Score — derive a startup score for agents without endorsements. + +Phase 2 trust scoring withholds a score when an agent has fewer than three +endorsements. That makes brand-new agents indistinguishable from inactive +ones. The cold-start score fills that gap by surfacing what *public* +signals are available (on-chain wallet history, GitHub activity, ERC-8004 +registry presence) without inventing a number when none exist. + +Contract (per Whitepaper v4 follow-up "Onboarding Q3 2026"): + +- Only applies when `endorser_count == 0`. Once endorsements arrive, the + Phase 2 score replaces this estimate. +- A NULL score with basis `"no_public_data"` is honest — never a fabricated + default. +- Cap at 60.0 so a cold-start agent can never appear stronger than an + endorsed one. +- Cached in `agents.cold_start_*` for 24h to avoid hammering Basescan and + GitHub on every trust-score lookup. + +This module is HTTP-tolerant: any fetcher that fails returns `None` and the +score falls back to the remaining sources. +""" +from __future__ import annotations + +import datetime +import json +import logging +import os +from typing import Optional +from urllib.parse import quote +from urllib.request import Request, urlopen + +import asyncpg + +log = logging.getLogger("moltrust.cold_start") + +CACHE_TTL_HOURS = 24 +COLD_START_CAP = 60.0 +HTTP_TIMEOUT_SECONDS = 8 + +BASESCAN_API = "https://api.basescan.org/api" +GITHUB_API = "https://api.github.com" + + +# --------------------------------------------------------------------------- +# External fetchers (HTTP) — kept tiny, return None on any failure +# --------------------------------------------------------------------------- + +def _http_get_json(url: str, headers: Optional[dict] = None) -> Optional[dict]: + try: + req = Request(url, headers={"User-Agent": "MolTrust-ColdStart/1.0", **(headers or {})}) + with urlopen(req, timeout=HTTP_TIMEOUT_SECONDS) as r: # noqa: S310 — URL is constructed from API constants + return json.loads(r.read()) + except Exception as e: + log.warning("cold-start http fetch failed for %s: %s", url, e) + return None + + +def fetch_basescan_wallet(wallet: str) -> Optional[dict]: + """Return {tx_count, age_days, usdc_volume} from Basescan, or None on failure. + + USDC volume is approximated as the count of USDC token transfers — Basescan's + ERC-20 transfer endpoint gives us the per-tx amounts but the cold-start + formula caps at $1000-equivalent, so the rough sum is sufficient. + """ + api_key = os.environ.get("BASESCAN_API_KEY", "") + if not api_key or not wallet: + return None + + # 1) Tx list — most recent up to 10k + tx_url = ( + f"{BASESCAN_API}?module=account&action=txlist" + f"&address={quote(wallet)}&startblock=0&endblock=99999999" + f"&sort=asc&apikey={quote(api_key)}" + ) + tx_data = _http_get_json(tx_url) + if not tx_data or tx_data.get("status") != "1": + return None + txs = tx_data.get("result", []) or [] + tx_count = len(txs) + if tx_count == 0: + return {"tx_count": 0, "age_days": 0, "usdc_volume": 0.0} + + try: + first_ts = int(txs[0].get("timeStamp", 0)) + except (TypeError, ValueError): + first_ts = 0 + age_days = max(0, int((datetime.datetime.utcnow().timestamp() - first_ts) / 86400)) if first_ts else 0 + + # 2) USDC volume (Base USDC contract). Cheap approximation: count + sum + usdc_contract = "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913" + erc_url = ( + f"{BASESCAN_API}?module=account&action=tokentx" + f"&contractaddress={usdc_contract}" + f"&address={quote(wallet)}&sort=asc&apikey={quote(api_key)}" + ) + erc_data = _http_get_json(erc_url) + usdc_volume = 0.0 + if erc_data and erc_data.get("status") == "1": + for t in erc_data.get("result", []) or []: + try: + # USDC has 6 decimals on Base + usdc_volume += float(t.get("value", 0)) / 1e6 + except (TypeError, ValueError): + pass + + return {"tx_count": tx_count, "age_days": age_days, "usdc_volume": usdc_volume} + + +def fetch_github_user(username: str) -> Optional[dict]: + """Return {public_repos, account_age_days, recent_commits} from GitHub, or None. + + `recent_commits` uses the public events feed (last 90 days, push events only). + """ + if not username: + return None + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN", "") + headers = {"Accept": "application/vnd.github+json"} + if token: + headers["Authorization"] = f"Bearer {token}" + + user = _http_get_json(f"{GITHUB_API}/users/{quote(username)}", headers=headers) + if not user or "login" not in user: + return None + + try: + created_at = datetime.datetime.fromisoformat(user["created_at"].replace("Z", "+00:00")) + age_days = max(0, (datetime.datetime.now(datetime.timezone.utc) - created_at).days) + except (KeyError, ValueError): + age_days = 0 + + # Recent commits: list push events from the last 90 days + events = _http_get_json(f"{GITHUB_API}/users/{quote(username)}/events/public", headers=headers) or [] + cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=90) + recent_commits = 0 + if isinstance(events, list): + for ev in events: + if ev.get("type") != "PushEvent": + continue + try: + ev_ts = datetime.datetime.fromisoformat(ev.get("created_at", "").replace("Z", "+00:00")) + except ValueError: + continue + if ev_ts >= cutoff: + payload = ev.get("payload") or {} + recent_commits += len(payload.get("commits") or []) + + return { + "public_repos": int(user.get("public_repos", 0) or 0), + "account_age_days": age_days, + "recent_commits": recent_commits, + } + + +async def check_erc8004_match(wallet: str, conn: asyncpg.Connection) -> bool: + """True if this wallet shows up in our local erc8004_outreach scan.""" + if not wallet: + return False + try: + row = await conn.fetchrow( + "SELECT 1 FROM erc8004_outreach WHERE LOWER(wallet_address) = LOWER($1) LIMIT 1", + wallet, + ) + return row is not None + except asyncpg.UndefinedTableError: + return False + + +# --------------------------------------------------------------------------- +# Pure scoring — no IO, easy to test +# --------------------------------------------------------------------------- + +def calculate_cold_start_score( + wallet_data: Optional[dict], + github_data: Optional[dict], + erc8004_match: bool, +) -> tuple[Optional[float], str, str]: + """Score in [0, 60], basis tag, and confidence label. + + Returns (None, "no_public_data", "none") when nothing is available — by + design, we never fabricate a starting score from thin air. + """ + score = 0.0 + basis: list[str] = [] + + if wallet_data and wallet_data.get("tx_count", 0) > 0: + tx_score = min(wallet_data.get("tx_count", 0) / 10, 8) + age_score = min(wallet_data.get("age_days", 0) / 30, 7) + vol_score = min(wallet_data.get("usdc_volume", 0) / 1000, 5) + score += tx_score + age_score + vol_score + basis.append("onchain") + + if github_data and github_data.get("public_repos", 0) > 0: + repo_score = min(github_data.get("public_repos", 0) / 3, 5) + age_score = min(github_data.get("account_age_days", 0) / 60, 5) + commit_score = min(github_data.get("recent_commits", 0) / 10, 5) + score += repo_score + age_score + commit_score + basis.append("github") + + if erc8004_match: + score += 10 + basis.append("erc8004") + + if not basis: + return None, "no_public_data", "none" + + confidence = "high" if len(basis) >= 2 else "medium" if len(basis) == 1 else "low" + return min(round(score, 1), COLD_START_CAP), "+".join(basis), confidence + + +# --------------------------------------------------------------------------- +# Orchestrator + 24h cache +# --------------------------------------------------------------------------- + +async def get_cold_start_score(did: str, conn: asyncpg.Connection) -> dict: + """Return the cold-start payload for an agent, computing or returning a cached value. + + Result shape: + { + "cold_start_score": float | None, + "cold_start_basis": str, + "cold_start_confidence": "none"|"low"|"medium"|"high", + "cold_start_computed_at": iso str | None, + "cold_start_note": str, + } + """ + row = await conn.fetchrow( + "SELECT wallet_address, cold_start_score, cold_start_basis, " + "cold_start_confidence, cold_start_computed_at " + "FROM agents WHERE did = $1", + did, + ) + if row is None: + return _payload(None, "no_public_data", "none", None) + + computed_at = row["cold_start_computed_at"] + if computed_at: + age = datetime.datetime.utcnow() - ( + computed_at.replace(tzinfo=None) if computed_at.tzinfo else computed_at + ) + if age < datetime.timedelta(hours=CACHE_TTL_HOURS): + return _payload( + row["cold_start_score"], + row["cold_start_basis"] or "no_public_data", + row["cold_start_confidence"] or "none", + computed_at, + ) + + wallet = row["wallet_address"] + wallet_data = fetch_basescan_wallet(wallet) if wallet else None + erc8004_match = await check_erc8004_match(wallet, conn) if wallet else False + # GitHub username is not yet a first-class field on agents; once the + # registration schema carries it this branch will activate. Until then + # we explicitly pass None rather than guessing. + github_data = None + + score, basis, confidence = calculate_cold_start_score(wallet_data, github_data, erc8004_match) + now = datetime.datetime.utcnow() + await conn.execute( + "UPDATE agents SET cold_start_score = $1, cold_start_basis = $2, " + "cold_start_confidence = $3, cold_start_computed_at = $4 WHERE did = $5", + score, basis, confidence, now, did, + ) + return _payload(score, basis, confidence, now) + + +def _payload(score: Optional[float], basis: str, confidence: str, computed_at) -> dict: + if score is None: + note = "No public history found. Score will build from first interactions." + else: + note = "Score based on public data. Will be replaced by behavioral history." + return { + "cold_start_score": score, + "cold_start_basis": basis, + "cold_start_confidence": confidence, + "cold_start_computed_at": computed_at.isoformat() if computed_at else None, + "cold_start_note": note, + } diff --git a/app/main.py b/app/main.py index 8fd129b..87ae58b 100644 --- a/app/main.py +++ b/app/main.py @@ -1386,6 +1386,15 @@ async def get_trust_score(did: str): } # CAEP: alias valid_until = cache_valid_until (keep both, non-breaking) score_response["valid_until"] = score_response["cache_valid_until"] + # F2 cold-start: surface a public-data-derived starting score for + # agents that have not yet collected any endorsements. Honest + # nulls when no public data exists — see app/cold_start.py. + if result.get("endorser_count", 0) == 0: + from app.cold_start import get_cold_start_score + try: + score_response.update(await get_cold_start_score(did, conn)) + except Exception as cs_err: + logger.warning("cold-start failed for %s: %s", did, cs_err) # CAEP: sign deterministic minimal payload with registry key if score_response["computed_at"] and score_response["valid_until"]: from app.signature import sign_payload, build_score_signing_payload diff --git a/app/migrations/007_cold_start_score.sql b/app/migrations/007_cold_start_score.sql new file mode 100644 index 0000000..efef731 --- /dev/null +++ b/app/migrations/007_cold_start_score.sql @@ -0,0 +1,18 @@ +-- F2 Cold-Start Score (Whitepaper v4, follow-up "Onboarding Q3 2026") +-- +-- Adds four columns to `agents` so we can cache a public-data-derived score +-- for agents that have not yet accumulated behavioral history (i.e. zero +-- endorsements). Cache TTL is 24h (enforced in app/cold_start.py). +-- +-- Score sources: +-- * On-chain wallet history (Base L2 via Basescan) — up to 20 points +-- * GitHub account activity (when available) — up to 15 points +-- * ERC-8004 registry presence — up to 10 points +-- +-- A NULL `cold_start_score` with basis = 'no_public_data' means the agent +-- has no externally-derivable score — we deliberately do not invent a value. + +ALTER TABLE agents ADD COLUMN IF NOT EXISTS cold_start_score FLOAT; +ALTER TABLE agents ADD COLUMN IF NOT EXISTS cold_start_basis VARCHAR(100); +ALTER TABLE agents ADD COLUMN IF NOT EXISTS cold_start_confidence VARCHAR(10); +ALTER TABLE agents ADD COLUMN IF NOT EXISTS cold_start_computed_at TIMESTAMP; diff --git a/tests/test_cold_start.py b/tests/test_cold_start.py new file mode 100644 index 0000000..d11cf21 --- /dev/null +++ b/tests/test_cold_start.py @@ -0,0 +1,126 @@ +"""F2 Cold-Start Score — pure unit tests for calculate_cold_start_score. + +The orchestrator (`get_cold_start_score`) has a database + HTTP surface that +the in-repo CI does not stand up; the formula itself is pure and exhaustively +covered here. Edge cases included: empty data, partial sources, source caps, +and the hard 60-point ceiling. +""" +import os +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +from app.cold_start import calculate_cold_start_score, COLD_START_CAP # noqa: E402 + + +# --------------------------------------------------------------------------- +# No data → honest null +# --------------------------------------------------------------------------- + +def test_no_data_returns_none(): + score, basis, confidence = calculate_cold_start_score(None, None, False) + assert score is None + assert basis == "no_public_data" + assert confidence == "none" + + +def test_empty_wallet_data_returns_none(): + score, basis, confidence = calculate_cold_start_score({"tx_count": 0}, None, False) + assert score is None + assert basis == "no_public_data" + + +# --------------------------------------------------------------------------- +# Single source — confidence "medium" +# --------------------------------------------------------------------------- + +def test_onchain_only_medium_confidence(): + score, basis, confidence = calculate_cold_start_score( + {"tx_count": 50, "age_days": 100, "usdc_volume": 2000}, None, False + ) + # tx_score=min(50/10,8)=5, age_score=min(100/30,7)=3.33, vol_score=min(2000/1000,5)=2 + # total ≈ 10.33 + assert score is not None + assert 9.5 <= score <= 11.0 + assert basis == "onchain" + assert confidence == "medium" + + +def test_github_only_medium_confidence(): + score, basis, confidence = calculate_cold_start_score( + None, + {"public_repos": 9, "account_age_days": 180, "recent_commits": 25}, + False, + ) + # repo=min(9/3,5)=3, age=min(180/60,5)=3, commit=min(25/10,5)=2.5 = 8.5 + assert 8.0 <= score <= 9.0 + assert basis == "github" + assert confidence == "medium" + + +def test_erc8004_only_medium_confidence(): + score, basis, confidence = calculate_cold_start_score(None, None, True) + assert score == 10.0 + assert basis == "erc8004" + assert confidence == "medium" + + +# --------------------------------------------------------------------------- +# Multiple sources — confidence "high" +# --------------------------------------------------------------------------- + +def test_onchain_plus_github_high_confidence(): + score, basis, confidence = calculate_cold_start_score( + {"tx_count": 50, "age_days": 100, "usdc_volume": 2000}, + {"public_repos": 9, "account_age_days": 180, "recent_commits": 25}, + False, + ) + assert score is not None + # ~10.33 + ~8.5 ≈ 18.8 + assert 17.5 <= score <= 19.5 + assert basis == "onchain+github" + assert confidence == "high" + + +def test_all_three_sources_high_confidence(): + score, basis, confidence = calculate_cold_start_score( + {"tx_count": 50, "age_days": 100, "usdc_volume": 2000}, + {"public_repos": 9, "account_age_days": 180, "recent_commits": 25}, + True, + ) + assert score is not None + # ~18.8 + 10 (erc8004) ≈ 28.8 + assert 27.5 <= score <= 30.0 + assert basis == "onchain+github+erc8004" + assert confidence == "high" + + +# --------------------------------------------------------------------------- +# Caps — per-source AND hard 60.0 ceiling +# --------------------------------------------------------------------------- + +def test_onchain_source_caps_at_20(): + score, _, _ = calculate_cold_start_score( + {"tx_count": 100_000, "age_days": 10_000, "usdc_volume": 10_000_000}, None, False + ) + # cap: 8 + 7 + 5 = 20 + assert score == 20.0 + + +def test_github_source_caps_at_15(): + score, _, _ = calculate_cold_start_score( + None, {"public_repos": 1000, "account_age_days": 10_000, "recent_commits": 1000}, False, + ) + assert score == 15.0 + + +def test_overall_score_cannot_exceed_60(): + score, _, _ = calculate_cold_start_score( + {"tx_count": 100_000, "age_days": 10_000, "usdc_volume": 10_000_000}, + {"public_repos": 1000, "account_age_days": 10_000, "recent_commits": 1000}, + True, + ) + # 20 + 15 + 10 = 45 — well below the ceiling, but let's also confirm the + # constant exists and is honoured. + assert score <= COLD_START_CAP + assert score == 45.0