Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 71 additions & 80 deletions PLAN.md

Large diffs are not rendered by default.

152 changes: 77 additions & 75 deletions TODO.md

Large diffs are not rendered by default.

85 changes: 72 additions & 13 deletions lib/cortex/Query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import type { EmbeddingRunner } from "../embeddings/EmbeddingRunner";
import { runPromotionSweep } from "../core/SalienceEngine";
import { computeSubgraphBounds } from "../core/HotpathPolicy";
import type { QueryResult } from "./QueryResult";
import { rankPages, spillToWarm } from "./Ranking";
import { rankPages, rankBooks, rankVolumes, rankShelves, spillToWarm, type RankedResult } from "./Ranking";
import { buildMetroid } from "./MetroidBuilder";
import { detectKnowledgeGap } from "./KnowledgeGapDetector";
import { solveOpenTSP } from "./OpenTSPSolver";
Expand Down Expand Up @@ -46,15 +46,75 @@ export async function query(

const rankingOptions = { vectorStore, metadataStore };

// --- HOT path: score resident pages ---
const hotpathEntries = await metadataStore.getHotpathEntries("page");
const hotpathIds = hotpathEntries.map((e) => e.entityId);
// --- Hierarchical routing: Shelf → Volume → Book → Page ---
// When higher-tier hotpath entries exist, we route through the hierarchy
// to narrow the candidate set before flat page scoring.
const hotpathShelfEntries = await metadataStore.getHotpathEntries("shelf");
const hotpathVolumeEntries = await metadataStore.getHotpathEntries("volume");
const hotpathBookEntries = await metadataStore.getHotpathEntries("book");
const hotpathPageEntries = await metadataStore.getHotpathEntries("page");

// Shelf drill-down → discover volume candidates
const volumeIdsFromShelves = new Set<Hash>();
if (hotpathShelfEntries.length > 0) {
const topShelves = await rankShelves(
queryEmbedding,
hotpathShelfEntries.map((e) => e.entityId),
Math.max(2, Math.ceil(hotpathShelfEntries.length / 2)),
rankingOptions,
);
for (const s of topShelves) {
for (const vid of s.childIds) volumeIdsFromShelves.add(vid);
}
}

// Volume ranking → discover book candidates
const volumeCandidateIds = new Set<Hash>([
...hotpathVolumeEntries.map((e) => e.entityId),
...volumeIdsFromShelves,
]);

const bookIdsFromVolumes = new Set<Hash>();
if (volumeCandidateIds.size > 0) {
const topVolumes = await rankVolumes(
queryEmbedding,
[...volumeCandidateIds],
Math.max(2, Math.ceil(volumeCandidateIds.size / 2)),
rankingOptions,
);
for (const v of topVolumes) {
for (const bid of v.childIds) bookIdsFromVolumes.add(bid);
}
}

const hotResults = await rankPages(queryEmbedding, hotpathIds, topK, rankingOptions);
// Book ranking → discover page candidates
const bookCandidateIds = new Set<Hash>([
...hotpathBookEntries.map((e) => e.entityId),
...bookIdsFromVolumes,
]);

const pageIdsFromBooks = new Set<Hash>();
if (bookCandidateIds.size > 0) {
const topBooks = await rankBooks(
queryEmbedding,
[...bookCandidateIds],
Math.max(2, Math.ceil(bookCandidateIds.size / 2)),
rankingOptions,
);
for (const b of topBooks) {
for (const pid of b.childIds) pageIdsFromBooks.add(pid);
}
}

// --- HOT path: score resident pages merged with hierarchy-discovered pages ---
const hotpathIds = hotpathPageEntries.map((e) => e.entityId);
const combinedPageIds = new Set<Hash>([...hotpathIds, ...pageIdsFromBooks]);

const hotResults = await rankPages(queryEmbedding, [...combinedPageIds], topK, rankingOptions);
const seenIds = new Set(hotResults.map((r) => r.id));

// --- Warm spill: fill up to topK if hot path is insufficient ---
let warmResults: Array<{ id: Hash; score: number }> = [];
let warmResults: RankedResult[] = [];
if (hotResults.length < topK) {
const allWarm = await spillToWarm("page", queryEmbedding, topK, rankingOptions);
warmResults = allWarm.filter((r) => !seenIds.has(r.id));
Expand All @@ -75,8 +135,7 @@ export async function query(
.map((r) => r.score);

// --- MetroidBuilder: build dialectical probe ---
// Candidates: hotpath book medoid pages + hotpath pages themselves
const hotpathBookEntries = await metadataStore.getHotpathEntries("book");
// Candidates: hotpath book medoid pages + top-ranked pages
const bookCandidates = (
await Promise.all(
hotpathBookEntries.map(async (e) => {
Expand Down Expand Up @@ -121,16 +180,16 @@ export async function query(

// --- Subgraph expansion ---
// Use dynamic Williams-derived bounds unless the caller has pinned an
// explicit maxHops value. Only load all pages when we actually need to
// compute bounds — skip the full-page scan on the hot path when maxHops is
// already known.
// explicit maxHops value. Prefer the hotpath resident count as an efficient
// proxy for corpus size to avoid scanning all pages on the hot path.
const topPageIds = topPages.map((p) => p.pageId);
let effectiveMaxHops: number;
if (options.maxHops !== undefined) {
effectiveMaxHops = options.maxHops;
} else {
const allPages = await metadataStore.getAllPages();
effectiveMaxHops = computeSubgraphBounds(allPages.length).maxHops;
const residentCount = await metadataStore.getResidentCount();
const graphMass = residentCount > 0 ? residentCount : combinedPageIds.size;
effectiveMaxHops = computeSubgraphBounds(Math.max(1, graphMass)).maxHops;
}
const subgraph = await metadataStore.getInducedNeighborSubgraph(topPageIds, effectiveMaxHops);

Expand Down
44 changes: 27 additions & 17 deletions lib/cortex/Ranking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,34 +21,42 @@ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}

export interface RankedResult {
id: Hash;
score: number;
/** Child IDs from the ranked entity (volumeIds / bookIds / pageIds). */
childIds: Hash[];
}

function pickTopK(
scored: Array<{ id: Hash; score: number }>,
scored: RankedResult[],
k: number,
): Array<{ id: Hash; score: number }> {
): RankedResult[] {
scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
return scored.slice(0, k);
}

/**
* Ranks shelves by cosine similarity of their routing prototype to the query.
* Uses routingPrototypeOffsets[0] as the representative vector.
* Returns child volumeIds alongside each scored shelf.
*/
export async function rankShelves(
queryEmbedding: Float32Array,
residentShelfIds: Hash[],
topK: number,
options: RankingOptions,
): Promise<Array<{ id: Hash; score: number }>> {
): Promise<RankedResult[]> {
if (residentShelfIds.length === 0) return [];

const { vectorStore, metadataStore } = options;
const scored: Array<{ id: Hash; score: number }> = [];
const scored: RankedResult[] = [];

for (const shelfId of residentShelfIds) {
const shelf = await metadataStore.getShelf(shelfId);
if (!shelf || shelf.routingPrototypeOffsets.length === 0) continue;
const vec = await vectorStore.readVector(shelf.routingPrototypeOffsets[0], shelf.routingDim);
scored.push({ id: shelfId, score: cosineSimilarity(queryEmbedding, vec) });
scored.push({ id: shelfId, score: cosineSimilarity(queryEmbedding, vec), childIds: shelf.volumeIds });
}

return pickTopK(scored, topK);
Expand All @@ -57,49 +65,51 @@ export async function rankShelves(
/**
* Ranks volumes by cosine similarity of their first prototype to the query.
* Uses prototypeOffsets[0] as the representative vector.
* Returns child bookIds alongside each scored volume.
*/
export async function rankVolumes(
queryEmbedding: Float32Array,
residentVolumeIds: Hash[],
topK: number,
options: RankingOptions,
): Promise<Array<{ id: Hash; score: number }>> {
): Promise<RankedResult[]> {
if (residentVolumeIds.length === 0) return [];

const { vectorStore, metadataStore } = options;
const scored: Array<{ id: Hash; score: number }> = [];
const scored: RankedResult[] = [];

for (const volumeId of residentVolumeIds) {
const volume = await metadataStore.getVolume(volumeId);
if (!volume || volume.prototypeOffsets.length === 0) continue;
const vec = await vectorStore.readVector(volume.prototypeOffsets[0], volume.prototypeDim);
scored.push({ id: volumeId, score: cosineSimilarity(queryEmbedding, vec) });
scored.push({ id: volumeId, score: cosineSimilarity(queryEmbedding, vec), childIds: volume.bookIds });
}

return pickTopK(scored, topK);
}

/**
* Ranks books by cosine similarity of their medoid page embedding to the query.
* Returns child pageIds alongside each scored book.
*/
export async function rankBooks(
queryEmbedding: Float32Array,
residentBookIds: Hash[],
topK: number,
options: RankingOptions,
): Promise<Array<{ id: Hash; score: number }>> {
): Promise<RankedResult[]> {
if (residentBookIds.length === 0) return [];

const { vectorStore, metadataStore } = options;
const scored: Array<{ id: Hash; score: number }> = [];
const scored: RankedResult[] = [];

for (const bookId of residentBookIds) {
const book = await metadataStore.getBook(bookId);
if (!book) continue;
const medoidPage = await metadataStore.getPage(book.medoidPageId);
if (!medoidPage) continue;
const vec = await vectorStore.readVector(medoidPage.embeddingOffset, medoidPage.embeddingDim);
scored.push({ id: bookId, score: cosineSimilarity(queryEmbedding, vec) });
scored.push({ id: bookId, score: cosineSimilarity(queryEmbedding, vec), childIds: book.pageIds });
}

return pickTopK(scored, topK);
Expand All @@ -113,17 +123,17 @@ export async function rankPages(
residentPageIds: Hash[],
topK: number,
options: RankingOptions,
): Promise<Array<{ id: Hash; score: number }>> {
): Promise<RankedResult[]> {
if (residentPageIds.length === 0) return [];

const { vectorStore, metadataStore } = options;
const scored: Array<{ id: Hash; score: number }> = [];
const scored: RankedResult[] = [];

for (const pageId of residentPageIds) {
const page = await metadataStore.getPage(pageId);
if (!page) continue;
const vec = await vectorStore.readVector(page.embeddingOffset, page.embeddingDim);
scored.push({ id: pageId, score: cosineSimilarity(queryEmbedding, vec) });
scored.push({ id: pageId, score: cosineSimilarity(queryEmbedding, vec), childIds: [] });
}

return pickTopK(scored, topK);
Expand All @@ -139,17 +149,17 @@ export async function spillToWarm(
queryEmbedding: Float32Array,
topK: number,
options: RankingOptions,
): Promise<Array<{ id: Hash; score: number }>> {
): Promise<RankedResult[]> {
if (tier !== "page") return [];

const { vectorStore, metadataStore } = options;
const allPages = await metadataStore.getAllPages();
if (allPages.length === 0) return [];

const scored: Array<{ id: Hash; score: number }> = [];
const scored: RankedResult[] = [];
for (const page of allPages) {
const vec = await vectorStore.readVector(page.embeddingOffset, page.embeddingDim);
scored.push({ id: page.pageId, score: cosineSimilarity(queryEmbedding, vec) });
scored.push({ id: page.pageId, score: cosineSimilarity(queryEmbedding, vec), childIds: [] });
}

return pickTopK(scored, topK);
Expand Down
4 changes: 2 additions & 2 deletions lib/daydreamer/ClusterStability.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ export interface LabelPropagationResult {
/**
* Run one pass of label propagation over all pages.
*
* Each node adopts the most frequent label among its Metroid neighbors.
* Each node adopts the most frequent label among its semantic neighbors.
* Ties are broken deterministically by choosing the lexicographically
* smallest label (consistent across runs and nodes).
*
Expand Down Expand Up @@ -107,7 +107,7 @@ async function propagationPass(

/**
* Assign community labels to all pages via label propagation on the
* Metroid (semantic) neighbor graph.
* Semantic neighbor graph.
*
* Initial labels: each page is its own community (pageId as initial label).
* Each iteration: every node adopts the most frequent label among neighbors.
Expand Down
11 changes: 7 additions & 4 deletions lib/daydreamer/FullNeighborRecalc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
// ---------------------------------------------------------------------------

import type { Hash, MetadataStore, SemanticNeighbor, Page, VectorStore } from "../core/types";
import { computeCapacity, DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy";
import { computeCapacity, computeNeighborMaxDegree, DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy";
import { batchComputeSalience, runPromotionSweep } from "../core/SalienceEngine";

// Minimum pair budget per idle recalc cycle.
Expand All @@ -31,7 +31,8 @@ export interface FullNeighborRecalcOptions {
metadataStore: MetadataStore;
vectorStore: VectorStore;
policy?: HotpathPolicy;
/** Maximum Metroid neighbors stored per page. Default: 16. */
/** Maximum semantic neighbors stored per page.
* When omitted, uses Williams-derived `computeNeighborMaxDegree(graphMass)`. */
maxNeighbors?: number;
/** Current timestamp (ms since epoch). Defaults to Date.now(). */
now?: number;
Expand Down Expand Up @@ -71,7 +72,7 @@ function cosineSimilarity(a: Float32Array, b: Float32Array): number {
*
* Finds all volumes flagged as dirty (via `needsNeighborRecalc`), loads
* their pages, computes pairwise cosine similarities, and updates the
* Metroid neighbor index. Processing is bounded by the Williams-Bound-derived
* semantic neighbor index. Processing is bounded by the Williams-Bound-derived
* maintenance budget to avoid blocking the idle loop.
*
* After recalculation, salience is recomputed for affected pages and a
Expand All @@ -84,7 +85,6 @@ export async function runFullNeighborRecalc(
metadataStore,
vectorStore,
policy = DEFAULT_HOTPATH_POLICY,
maxNeighbors = 16,
now = Date.now(),
} = options;

Expand All @@ -110,6 +110,9 @@ export async function runFullNeighborRecalc(
const totalGraphMass = (await metadataStore.getAllPages()).length;
const pairBudget = Math.max(MIN_RECALC_PAIR_BUDGET, computeCapacity(totalGraphMass, policy.c));

// Derive max neighbor degree from Williams bounds if not explicitly provided.
const maxNeighbors = options.maxNeighbors ?? computeNeighborMaxDegree(totalGraphMass, policy.c);

let totalVolumesProcessed = 0;
let totalPagesProcessed = 0;
let totalPairsComputed = 0;
Expand Down
9 changes: 6 additions & 3 deletions lib/daydreamer/HebbianUpdater.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// ---------------------------------------------------------------------------

import type { Edge, Hash, MetadataStore } from "../core/types";
import { DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy";
import { computeNeighborMaxDegree, DEFAULT_HOTPATH_POLICY, type HotpathPolicy } from "../core/HotpathPolicy";
import { batchComputeSalience, runPromotionSweep } from "../core/SalienceEngine";

// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -43,7 +43,8 @@ export interface HebbianUpdaterOptions {
ltdDecay?: number;
/** Prune edges whose weight drops below this value. Default: DEFAULT_PRUNE_THRESHOLD. */
pruneThreshold?: number;
/** Maximum outgoing degree per node. Default: DEFAULT_MAX_DEGREE. */
/** Maximum outgoing Hebbian edges per node.
* When omitted, uses Williams-derived `computeNeighborMaxDegree(graphMass)`. */
maxDegree?: number;
/** Current timestamp (ms since epoch). Defaults to Date.now(). */
now?: number;
Expand Down Expand Up @@ -135,13 +136,15 @@ export async function decayAndPrune(
policy = DEFAULT_HOTPATH_POLICY,
ltdDecay = DEFAULT_LTD_DECAY,
pruneThreshold = DEFAULT_PRUNE_THRESHOLD,
maxDegree = DEFAULT_MAX_DEGREE,
now = Date.now(),
} = options;

const allPages = await metadataStore.getAllPages();
if (allPages.length === 0) return { decayed: 0, pruned: 0 };

// Derive max degree from Williams bounds if not explicitly provided.
const maxDegree = options.maxDegree ?? computeNeighborMaxDegree(allPages.length, policy.c);

const changedNodeIds = new Set<Hash>();
let totalDecayed = 0;
let totalPruned = 0;
Expand Down
Loading
Loading