From 08aec2145443109d21828b0ae1b230c8f3bffa4e Mon Sep 17 00:00:00 2001 From: frapercan Date: Fri, 8 May 2026 18:05:49 +0200 Subject: [PATCH] =?UTF-8?q?feat(web):=20benchmark=20heatmap=20small-multip?= =?UTF-8?q?les=20=E2=80=94=20visual=20ranking=20per=20(cat=20=C3=97=20aspe?= =?UTF-8?q?ct)=20cell?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the long flat matrix as the default view of /benchmark with a 3-aspects × N-categories grid of compact heatmap cards. Each cell ranks embeddings by Fmax with horizontal bars colored on a perceptual scale, the leader marked with a medal, and a slot reserved for bootstrap CI whiskers (rendered when persisted). The original full matrix table stays one click away behind a Heatmap | Table toggle so the export-friendly raw-numbers view isn't lost. New components/BenchmarkHeatmap.tsx - bestRowsByEmbedding: collapses the matrix endpoint's per-K rows to one bar per embedding (the cell's best across stages/Ks already in the active selection). - HSL gradient blue→violet by Fmax, bar width proportional. Color is supportive; the bar length is the primary signal for accessibility. - Aspect-tinted card header (MFO blue / BPO violet / CCO emerald) so the per-aspect column reads at a glance. - Hover tooltip exposes stage / K / Fmax. Future CI whiskers will render in the same row without changing the cell layout. apps/web/app/[locale]/benchmark/page.tsx - New viewMode state (default "heatmap"). - Toggle bar (role=tablist, aria-selected) rendered when there's data. - Existing leaderboards (global + in-selection) stay above the toggle unchanged — they're already the per-cell story. Behavior unchanged: - Filters (stage, K, evaluation_set), CSV export, leaderboards, full matrix table — all preserved. Toggle to "Table" for the prior view. CI: next build green; backend untouched. --- apps/web/app/[locale]/benchmark/page.tsx | 43 ++++- apps/web/components/BenchmarkHeatmap.tsx | 200 +++++++++++++++++++++++ 2 files changed, 242 insertions(+), 1 deletion(-) create mode 100644 apps/web/components/BenchmarkHeatmap.tsx diff --git a/apps/web/app/[locale]/benchmark/page.tsx b/apps/web/app/[locale]/benchmark/page.tsx index b93c04d..95e0038 100644 --- a/apps/web/app/[locale]/benchmark/page.tsx +++ b/apps/web/app/[locale]/benchmark/page.tsx @@ -2,6 +2,7 @@ import { useEffect, useMemo, useState } from "react"; import Link from "next/link"; +import { BenchmarkHeatmap } from "@/components/BenchmarkHeatmap"; import { getBenchmarkEmbeddings, getBenchmarkMatrix, @@ -145,6 +146,7 @@ export default function BenchmarkPage() { const [stage, setStage] = useState(null); const [evalSetId, setEvalSetId] = useState("all"); const [selectedK, setSelectedK] = useState(null); + const [viewMode, setViewMode] = useState<"heatmap" | "table">("heatmap"); // Unfiltered catalog fetch — populates the full set of known stages and // eval sets, so selector chips don't disappear when a filtered query @@ -554,7 +556,39 @@ export default function BenchmarkPage() { )} - {/* Matrix table */} + {/* View toggle: Heatmap (default) | Table */} + {hasData && ( +
+
+ {(["heatmap", "table"] as const).map((mode) => { + const active = viewMode === mode; + return ( + + ); + })} +
+

+ {viewMode === "heatmap" + ? "Visual ranking per cell. Bars sorted by Fmax." + : "Full matrix with raw numbers. Useful for export."} +

+
+ )} + + {/* Matrix view */} {!hasData ? (

@@ -566,6 +600,13 @@ export default function BenchmarkPage() { this cell of the matrix.

+ ) : viewMode === "heatmap" ? ( + ) : (
diff --git a/apps/web/components/BenchmarkHeatmap.tsx b/apps/web/components/BenchmarkHeatmap.tsx new file mode 100644 index 0000000..bd2eef8 --- /dev/null +++ b/apps/web/components/BenchmarkHeatmap.tsx @@ -0,0 +1,200 @@ +"use client"; + +import { useMemo } from "react"; +import type { BenchmarkEmbedding, BenchmarkRow } from "@/lib/api"; + +/** + * Small-multiples heatmap for the benchmark matrix. + * + * Layout: a (categories × aspects) grid of compact cards. Inside each + * card, one horizontal bar per embedding, sorted by Fmax descending. + * Bar width is proportional to Fmax; color reads on a fixed gradient + * so the eye can scan rank quickly. The leader gets a subtle medal. + * + * The space marked CI is reserved — when bootstrap CIs are persisted + * we'll render a `±` whisker without changing the cell layout. + */ +export type BenchmarkHeatmapProps = { + rows: BenchmarkRow[]; + embeddings: BenchmarkEmbedding[]; + categories: string[]; + aspects: string[]; + /** Optional: hide rows whose embedding isn't in this set. */ + embeddingFilter?: Set | null; +}; + +const ASPECT_TONE: Record = { + MFO: { ring: "ring-blue-100", bg: "bg-blue-50/60", text: "text-blue-700" }, + BPO: { ring: "ring-violet-100", bg: "bg-violet-50/60", text: "text-violet-700" }, + CCO: { ring: "ring-emerald-100", bg: "bg-emerald-50/60", text: "text-emerald-700" }, +}; + +const ASPECT_LABELS: Record = { + MFO: "Molecular Function", + BPO: "Biological Process", + CCO: "Cellular Component", +}; + +const CATEGORY_LABELS: Record = { + NK: "No Knowledge", + LK: "Limited Knowledge", + PK: "Partial Knowledge", +}; + +/** Pick the best Fmax row per embedding inside a single cell. The matrix + * endpoint already dedupes per (eid, esid, st, k, cat, asp) — collapse + * further to one row per embedding for the visualization. */ +function bestRowsByEmbedding(rows: BenchmarkRow[]): BenchmarkRow[] { + const best = new Map(); + for (const r of rows) { + const cur = best.get(r.embedding_config_id); + if (cur == null || r.fmax > cur.fmax) best.set(r.embedding_config_id, r); + } + return Array.from(best.values()).sort((a, b) => b.fmax - a.fmax); +} + +/** Linear interpolation between two HSL colors. Returns a CSS color. */ +function fmaxToColor(fmax: number): string { + // 0 → cool blue, 1 → deep violet (perceptually rising) + const t = Math.max(0, Math.min(1, fmax)); + const hue = 220 - 50 * t; // 220 (blue) → 270 (violet) + const sat = 65 + 20 * t; // 65 → 85 + const light = 70 - 22 * t; // 70 → 48 (darker = better) + return `hsl(${hue}, ${sat}%, ${light}%)`; +} + +function HeatmapCell({ + cat, + asp, + rows, + embeddings, +}: { + cat: string; + asp: string; + rows: BenchmarkRow[]; + embeddings: BenchmarkEmbedding[]; +}) { + const tone = ASPECT_TONE[asp] ?? ASPECT_TONE.MFO; + const empty = rows.length === 0; + + return ( +
+
+
+ {cat} + · + {asp} +
+ + {ASPECT_LABELS[asp] ?? ""} + +
+
+ {empty ? ( +

No data

+ ) : ( +
    + {rows.map((r, i) => { + const emb = embeddings.find((e) => e.id === r.embedding_config_id); + const name = emb?.display_name ?? r.embedding_config_id.slice(0, 6); + const isWinner = i === 0; + return ( +
  • +
    + {isWinner && ( + 🥇 + )} + {name} +
    +
    +
    + {/* Reserved slot for bootstrap CI whisker — see roadmap */} +
    + + {r.fmax.toFixed(3)} + +
  • + ); + })} +
+ )} +
+
+ ); +} + +export function BenchmarkHeatmap({ + rows, + embeddings, + categories, + aspects, + embeddingFilter, +}: BenchmarkHeatmapProps) { + const grid = useMemo(() => { + const map = new Map(); + for (const r of rows) { + if (embeddingFilter && !embeddingFilter.has(r.embedding_config_id)) continue; + const k = `${r.category}|${r.aspect}`; + const arr = map.get(k); + if (arr) arr.push(r); + else map.set(k, [r]); + } + return map; + }, [rows, embeddingFilter]); + + const aspectsCount = aspects.length; + const cols = + aspectsCount === 3 ? "lg:grid-cols-3" : aspectsCount === 2 ? "lg:grid-cols-2" : "lg:grid-cols-1"; + + return ( +
+
+
+

+ 📊 + Per-cell heatmap · 8 PLMs × {categories.length} categories × {aspectsCount} aspects +

+

+ Each card shows the best Fmax per embedding within the active selection. Bars sorted descending; leader marked. +

+
+
+ Fmax + + 0.0 + + 1.0 +
+
+
+ {categories.map((cat) => + aspects.map((asp) => ( + + )), + )} +
+

+ Hover any bar for stage / K / Fmax detail. CI whiskers will render + in the same slot once bootstrap intervals are persisted. +

+
+ ); +}