diff --git a/apps/web/app/[locale]/benchmark/page.tsx b/apps/web/app/[locale]/benchmark/page.tsx index b7f4aa9..9b829b9 100644 --- a/apps/web/app/[locale]/benchmark/page.tsx +++ b/apps/web/app/[locale]/benchmark/page.tsx @@ -560,7 +560,39 @@ export default function BenchmarkPage() { )} - {/* Matrix table */} + {/* View toggle: Heatmap (default) | Table */} + {hasData && ( +
+
+ {(["heatmap", "table"] as const).map((mode) => { + const active = viewMode === mode; + return ( + + ); + })} +
+

+ {viewMode === "heatmap" + ? "Visual ranking per cell. Bars sorted by Fmax." + : "Full matrix with raw numbers. Useful for export."} +

+
+ )} + + {/* Matrix view */} {!hasData ? (

@@ -572,6 +604,13 @@ export default function BenchmarkPage() { this cell of the matrix.

+ ) : viewMode === "heatmap" ? ( + ) : (
diff --git a/apps/web/components/BenchmarkHeatmap.tsx b/apps/web/components/BenchmarkHeatmap.tsx new file mode 100644 index 0000000..bd2eef8 --- /dev/null +++ b/apps/web/components/BenchmarkHeatmap.tsx @@ -0,0 +1,200 @@ +"use client"; + +import { useMemo } from "react"; +import type { BenchmarkEmbedding, BenchmarkRow } from "@/lib/api"; + +/** + * Small-multiples heatmap for the benchmark matrix. + * + * Layout: a (categories ร— aspects) grid of compact cards. Inside each + * card, one horizontal bar per embedding, sorted by Fmax descending. + * Bar width is proportional to Fmax; color reads on a fixed gradient + * so the eye can scan rank quickly. The leader gets a subtle medal. + * + * The space marked CI is reserved โ€” when bootstrap CIs are persisted + * we'll render a `ยฑ` whisker without changing the cell layout. + */ +export type BenchmarkHeatmapProps = { + rows: BenchmarkRow[]; + embeddings: BenchmarkEmbedding[]; + categories: string[]; + aspects: string[]; + /** Optional: hide rows whose embedding isn't in this set. */ + embeddingFilter?: Set | null; +}; + +const ASPECT_TONE: Record = { + MFO: { ring: "ring-blue-100", bg: "bg-blue-50/60", text: "text-blue-700" }, + BPO: { ring: "ring-violet-100", bg: "bg-violet-50/60", text: "text-violet-700" }, + CCO: { ring: "ring-emerald-100", bg: "bg-emerald-50/60", text: "text-emerald-700" }, +}; + +const ASPECT_LABELS: Record = { + MFO: "Molecular Function", + BPO: "Biological Process", + CCO: "Cellular Component", +}; + +const CATEGORY_LABELS: Record = { + NK: "No Knowledge", + LK: "Limited Knowledge", + PK: "Partial Knowledge", +}; + +/** Pick the best Fmax row per embedding inside a single cell. The matrix + * endpoint already dedupes per (eid, esid, st, k, cat, asp) โ€” collapse + * further to one row per embedding for the visualization. */ +function bestRowsByEmbedding(rows: BenchmarkRow[]): BenchmarkRow[] { + const best = new Map(); + for (const r of rows) { + const cur = best.get(r.embedding_config_id); + if (cur == null || r.fmax > cur.fmax) best.set(r.embedding_config_id, r); + } + return Array.from(best.values()).sort((a, b) => b.fmax - a.fmax); +} + +/** Linear interpolation between two HSL colors. Returns a CSS color. */ +function fmaxToColor(fmax: number): string { + // 0 โ†’ cool blue, 1 โ†’ deep violet (perceptually rising) + const t = Math.max(0, Math.min(1, fmax)); + const hue = 220 - 50 * t; // 220 (blue) โ†’ 270 (violet) + const sat = 65 + 20 * t; // 65 โ†’ 85 + const light = 70 - 22 * t; // 70 โ†’ 48 (darker = better) + return `hsl(${hue}, ${sat}%, ${light}%)`; +} + +function HeatmapCell({ + cat, + asp, + rows, + embeddings, +}: { + cat: string; + asp: string; + rows: BenchmarkRow[]; + embeddings: BenchmarkEmbedding[]; +}) { + const tone = ASPECT_TONE[asp] ?? ASPECT_TONE.MFO; + const empty = rows.length === 0; + + return ( +
+
+
+ {cat} + ยท + {asp} +
+ + {ASPECT_LABELS[asp] ?? ""} + +
+
+ {empty ? ( +

No data

+ ) : ( +
    + {rows.map((r, i) => { + const emb = embeddings.find((e) => e.id === r.embedding_config_id); + const name = emb?.display_name ?? r.embedding_config_id.slice(0, 6); + const isWinner = i === 0; + return ( +
  • +
    + {isWinner && ( + ๐Ÿฅ‡ + )} + {name} +
    +
    +
    + {/* Reserved slot for bootstrap CI whisker โ€” see roadmap */} +
    + + {r.fmax.toFixed(3)} + +
  • + ); + })} +
+ )} +
+
+ ); +} + +export function BenchmarkHeatmap({ + rows, + embeddings, + categories, + aspects, + embeddingFilter, +}: BenchmarkHeatmapProps) { + const grid = useMemo(() => { + const map = new Map(); + for (const r of rows) { + if (embeddingFilter && !embeddingFilter.has(r.embedding_config_id)) continue; + const k = `${r.category}|${r.aspect}`; + const arr = map.get(k); + if (arr) arr.push(r); + else map.set(k, [r]); + } + return map; + }, [rows, embeddingFilter]); + + const aspectsCount = aspects.length; + const cols = + aspectsCount === 3 ? "lg:grid-cols-3" : aspectsCount === 2 ? "lg:grid-cols-2" : "lg:grid-cols-1"; + + return ( +
+
+
+

+ ๐Ÿ“Š + Per-cell heatmap ยท 8 PLMs ร— {categories.length} categories ร— {aspectsCount} aspects +

+

+ Each card shows the best Fmax per embedding within the active selection. Bars sorted descending; leader marked. +

+
+
+ Fmax + + 0.0 + โ†’ + 1.0 +
+
+
+ {categories.map((cat) => + aspects.map((asp) => ( + + )), + )} +
+

+ Hover any bar for stage / K / Fmax detail. CI whiskers will render + in the same slot once bootstrap intervals are persisted. +

+
+ ); +}