diff --git a/tests/khive-contract/fixtures/memories_corpus_v2.json b/tests/khive-contract/fixtures/memories_corpus_v2.json new file mode 100644 index 00000000..ed6212e5 --- /dev/null +++ b/tests/khive-contract/fixtures/memories_corpus_v2.json @@ -0,0 +1,2239 @@ +{ + "_meta": { + "version": "v2", + "description": "Harder eval corpus for recall tuning. Synonym queries, partial matches, distractors, 5+ domains, ground-truth expected_top_k. Includes importance_trap queries to discriminate importance_weight parameter.", + "memory_count": 203, + "query_count": 48, + "domains": ["ml", "biology", "programming", "history", "geography", "cooking", "medicine", "physics", "economics", "linguistics"], + "distractor_count": 46, + "distractor_ratio": "~22.7% (46 of 203 memories have no query relevance)", + "query_types": { + "synonym": "queries using synonyms not present verbatim in memory content", + "partial": "queries where only some terms appear in target memory", + "importance_trap": "queries with unique terms matching low-importance memories; discriminates importance_weight parameter" + } + }, + "memories": [ + { + "id": "mem_001", + "content": "Backpropagation computes gradients by applying the chain rule through each layer of a neural network, enabling weight updates via gradient descent", + "domain": "ml", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["ml", "neural-networks", "optimization"] + }, + { + "id": "mem_002", + "content": "Stochastic gradient descent updates model parameters using the gradient computed on a random mini-batch rather than the full training dataset", + "domain": "ml", + "importance": 0.88, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["ml", "optimization"] + }, + { + "id": "mem_003", + "content": "Adam optimizer combines momentum and adaptive learning rates for each parameter, converging faster than vanilla SGD on most deep learning tasks", + "domain": "ml", + "importance": 0.87, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["ml", "optimization"] + }, + { + "id": "mem_004", + "content": "Attention mechanisms allow neural networks to selectively focus on relevant parts of the input sequence when producing each output token", + "domain": "ml", + "importance": 0.92, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["ml", "transformers", "attention"] + }, + { + "id": "mem_005", + "content": "The transformer architecture uses multi-head self-attention and feedforward layers to process sequences without recurrence, enabling parallelization", + "domain": "ml", + "importance": 0.95, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["ml", "transformers"] + }, + { + "id": "mem_006", + "content": "Dropout regularization randomly sets a fraction of activations to zero during training to reduce overfitting in deep neural networks", + "domain": "ml", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "regularization"] + }, + { + "id": "mem_007", + "content": "Batch normalization normalizes layer activations to have zero mean and unit variance, stabilizing training and enabling higher learning rates", + "domain": "ml", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "normalization"] + }, + { + "id": "mem_008", + "content": "Convolutional neural networks use learnable filters that slide over spatial input to detect local patterns, forming the backbone of computer vision models", + "domain": "ml", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["ml", "cnn", "computer-vision"] + }, + { + "id": "mem_009", + "content": "Recurrent neural networks maintain hidden state across time steps to model sequential dependencies in variable-length input sequences", + "domain": "ml", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "rnn", "sequence"] + }, + { + "id": "mem_010", + "content": "Long short-term memory networks solve the vanishing gradient problem in RNNs by using gating mechanisms to control information flow over long sequences", + "domain": "ml", + "importance": 0.87, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "lstm", "sequence"] + }, + { + "id": "mem_011", + "content": "Transfer learning fine-tunes a pretrained model on a new task, leveraging representations learned from a large corpus to achieve better performance with less data", + "domain": "ml", + "importance": 0.88, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["ml", "transfer-learning"] + }, + { + "id": "mem_012", + "content": "Embedding layers map discrete tokens to dense vectors in a continuous space, capturing semantic relationships between words or categories", + "domain": "ml", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "embeddings", "nlp"] + }, + { + "id": "mem_013", + "content": "Reinforcement learning trains agents by rewarding desired behaviors and penalizing undesired ones through interaction with an environment", + "domain": "ml", + "importance": 0.87, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "reinforcement-learning"] + }, + { + "id": "mem_014", + "content": "Cross-entropy loss measures the divergence between predicted probability distributions and true labels, commonly used for classification tasks", + "domain": "ml", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "loss-functions"] + }, + { + "id": "mem_015", + "content": "Hyperparameter tuning searches over learning rate, batch size, and network architecture to find configurations that maximize validation performance", + "domain": "ml", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "hyperparameters"] + }, + { + "id": "mem_016", + "content": "Mitochondria are the powerhouse of the cell, generating ATP through oxidative phosphorylation in the electron transport chain", + "domain": "biology", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "cell", "energy"] + }, + { + "id": "mem_017", + "content": "Cellular respiration converts glucose and oxygen into carbon dioxide, water, and ATP energy through glycolysis, the Krebs cycle, and oxidative phosphorylation", + "domain": "biology", + "importance": 0.88, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["biology", "metabolism", "energy"] + }, + { + "id": "mem_018", + "content": "Messenger RNA carries the genetic blueprint from DNA in the nucleus to ribosomes in the cytoplasm where proteins are synthesized", + "domain": "biology", + "importance": 0.87, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["biology", "genetics", "protein-synthesis"] + }, + { + "id": "mem_019", + "content": "Natural selection favors individuals with heritable traits that increase survival and reproductive success in a given environment", + "domain": "biology", + "importance": 0.88, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["biology", "evolution"] + }, + { + "id": "mem_020", + "content": "Enzymes are biological catalysts that speed up chemical reactions by lowering activation energy without being consumed in the process", + "domain": "biology", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "biochemistry", "enzymes"] + }, + { + "id": "mem_021", + "content": "The immune system distinguishes self from non-self through antigen presentation, T-cell activation, and antibody-mediated humoral immunity", + "domain": "biology", + "importance": 0.87, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "immunology"] + }, + { + "id": "mem_022", + "content": "Stem cells are undifferentiated cells capable of self-renewal and differentiation into specialized cell types such as neurons, muscle, or blood cells", + "domain": "biology", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "stem-cells"] + }, + { + "id": "mem_023", + "content": "The human genome contains approximately 3 billion base pairs encoding roughly 20,000 protein-coding genes distributed across 23 chromosome pairs", + "domain": "biology", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "genetics", "genome"] + }, + { + "id": "mem_024", + "content": "Epigenetic modifications such as DNA methylation and histone acetylation regulate gene expression without altering the underlying DNA sequence", + "domain": "biology", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "epigenetics", "gene-regulation"] + }, + { + "id": "mem_025", + "content": "Symbiosis describes the close ecological interaction between two species, which can be mutualistic, commensal, or parasitic in nature", + "domain": "biology", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "ecology"] + }, + { + "id": "mem_026", + "content": "Memory allocators like jemalloc and tcmalloc manage heap fragmentation and thread-local caches to reduce allocation latency in concurrent programs", + "domain": "programming", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "systems", "memory"] + }, + { + "id": "mem_027", + "content": "Async runtimes like Tokio schedule thousands of lightweight tasks onto a thread pool using cooperative multitasking and event-driven IO", + "domain": "programming", + "importance": 0.87, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "async", "rust"] + }, + { + "id": "mem_028", + "content": "Type inference allows compilers to deduce the types of expressions automatically without requiring explicit annotations from the programmer", + "domain": "programming", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "type-systems"] + }, + { + "id": "mem_029", + "content": "Monads in functional programming represent computations as values and allow sequencing effects such as IO, optional values, or error handling", + "domain": "programming", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "functional", "haskell"] + }, + { + "id": "mem_030", + "content": "Lock-free data structures use atomic compare-and-swap operations instead of mutexes to allow concurrent access without blocking threads", + "domain": "programming", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "concurrency", "lock-free"] + }, + { + "id": "mem_031", + "content": "Garbage collection automatically reclaims unreachable heap memory by tracing live object graphs from root pointers in languages like Java and Go", + "domain": "programming", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "memory", "gc"] + }, + { + "id": "mem_032", + "content": "Compiler intermediate representations like LLVM IR abstract over machine architectures, enabling target-independent optimizations like dead code elimination", + "domain": "programming", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "compilers", "llvm"] + }, + { + "id": "mem_033", + "content": "Cache coherence protocols like MESI ensure that processor caches remain consistent when multiple cores access shared memory locations", + "domain": "programming", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "systems", "cpu"] + }, + { + "id": "mem_034", + "content": "B-tree indexes in relational databases provide O(log n) search, insert, and delete by maintaining sorted order across balanced tree nodes on disk", + "domain": "programming", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "databases", "indexes"] + }, + { + "id": "mem_035", + "content": "Consensus algorithms like Raft elect a leader and replicate a log across a cluster to achieve fault-tolerant distributed agreement", + "domain": "programming", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "distributed-systems"] + }, + { + "id": "mem_036", + "content": "Event sourcing stores all state changes as an immutable append-only log of events rather than mutable current state in a relational table", + "domain": "programming", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "architecture", "event-sourcing"] + }, + { + "id": "mem_037", + "content": "The Peloponnesian War between Athens and Sparta lasted from 431 to 404 BC and ended with Athenian defeat after the Sicilian Expedition disaster", + "domain": "history", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "greece", "ancient"] + }, + { + "id": "mem_038", + "content": "The Byzantine Empire survived the fall of Rome for nearly a thousand years, preserving Greek and Roman culture in Constantinople until 1453", + "domain": "history", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "byzantine", "medieval"] + }, + { + "id": "mem_039", + "content": "The printing press invented by Gutenberg around 1440 enabled mass reproduction of texts, accelerating the Reformation and the Scientific Revolution", + "domain": "history", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "technology", "renaissance"] + }, + { + "id": "mem_040", + "content": "The Treaty of Westphalia in 1648 ended the Thirty Years War and established the modern system of sovereign nation-states in Europe", + "domain": "history", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "europe", "diplomacy"] + }, + { + "id": "mem_041", + "content": "The colonization of the Americas by European powers decimated indigenous populations through introduced diseases, conquest, and forced labor systems", + "domain": "history", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "colonization", "americas"] + }, + { + "id": "mem_042", + "content": "The Meiji Restoration of 1868 modernized Japan by abolishing the feudal samurai class and rapidly adopting Western technology and institutions", + "domain": "history", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "japan", "modernization"] + }, + { + "id": "mem_043", + "content": "The Cold War rivalry between the United States and Soviet Union drove the space race, proxy wars, and nuclear deterrence strategy from 1947 to 1991", + "domain": "history", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "cold-war", "20th-century"] + }, + { + "id": "mem_044", + "content": "The Berlin Wall construction in 1961 physically divided East and West Germany, becoming the defining symbol of Iron Curtain division in Europe", + "domain": "history", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "cold-war", "germany"] + }, + { + "id": "mem_045", + "content": "The Scramble for Africa between 1881 and 1914 resulted in European powers colonizing nearly the entire African continent for resources and strategic position", + "domain": "history", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "colonization", "africa"] + }, + { + "id": "mem_046", + "content": "The Sahara Desert is the world's largest hot desert spanning over 9 million square kilometers across Northern Africa with extreme temperature variation", + "domain": "geography", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "africa", "desert"] + }, + { + "id": "mem_047", + "content": "The Amazon River basin contains the largest tropical rainforest on Earth, housing an estimated 10% of all species on the planet", + "domain": "geography", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "south-america", "ecosystem"] + }, + { + "id": "mem_048", + "content": "Tectonic plates move at centimeter-per-year rates driven by convection currents in the mantle, causing earthquakes, volcanoes, and mountain formation", + "domain": "geography", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "geology", "tectonics"] + }, + { + "id": "mem_049", + "content": "The Himalayan mountain range formed when the Indian subcontinent collided with Eurasia approximately 50 million years ago, creating the world's highest peaks", + "domain": "geography", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "mountains", "asia"] + }, + { + "id": "mem_050", + "content": "Ocean currents like the Gulf Stream transport heat across vast distances, moderating coastal climates and influencing global weather patterns", + "domain": "geography", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "oceanography", "climate"] + }, + { + "id": "mem_051", + "content": "Permafrost in Arctic and subarctic regions stores vast quantities of frozen organic carbon that may release greenhouse gases as global temperatures rise", + "domain": "geography", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "climate", "arctic"] + }, + { + "id": "mem_052", + "content": "River deltas form where sediment carried by a river is deposited as the current slows upon reaching the sea, creating fertile agricultural land", + "domain": "geography", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "rivers", "geomorphology"] + }, + { + "id": "mem_053", + "content": "The Mediterranean Sea has a nearly enclosed basin with limited tidal range, making it saltier than the Atlantic due to high evaporation rates", + "domain": "geography", + "importance": 0.73, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "seas", "oceanography"] + }, + { + "id": "mem_054", + "content": "Maillard reaction between amino acids and reducing sugars at high temperatures produces hundreds of flavor compounds that give browned food its complex taste", + "domain": "cooking", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "chemistry", "flavor"] + }, + { + "id": "mem_055", + "content": "Fermentation by bacteria and yeast converts sugars into alcohol, lactic acid, or carbon dioxide, producing bread, beer, wine, yogurt, and kimchi", + "domain": "cooking", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "fermentation", "microbiology"] + }, + { + "id": "mem_056", + "content": "Emulsification combines fat and water into a stable mixture using an emulsifier like lecithin or mustard, creating sauces like hollandaise and mayonnaise", + "domain": "cooking", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "chemistry", "sauces"] + }, + { + "id": "mem_057", + "content": "Caramelization occurs when sugar is heated above 160 degrees Celsius, breaking down sucrose into simpler compounds with a characteristic amber color and toffee flavor", + "domain": "cooking", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "chemistry", "sugar"] + }, + { + "id": "mem_058", + "content": "Umami is the fifth basic taste detected by glutamate receptors, naturally found in aged cheeses, soy sauce, mushrooms, and dried fish", + "domain": "cooking", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "taste", "flavor"] + }, + { + "id": "mem_059", + "content": "Braising cooks tough cuts of meat slowly in liquid at low temperature, dissolving collagen into gelatin and creating a rich, tender result", + "domain": "cooking", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "techniques", "meat"] + }, + { + "id": "mem_060", + "content": "Gluten forms when wheat flour proteins glutenin and gliadin are hydrated and kneaded, creating an elastic network that traps fermentation gas in bread dough", + "domain": "cooking", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "baking", "chemistry"] + }, + { + "id": "mem_061", + "content": "Knife skills like julienne, chiffonade, and brunoise refer to specific cutting shapes and sizes that affect cook times and texture consistency", + "domain": "cooking", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "techniques", "preparation"] + }, + { + "id": "mem_062", + "content": "Spices like turmeric, cumin, coriander, and cardamom form the flavor backbone of South Asian cuisine and have documented antimicrobial properties", + "domain": "cooking", + "importance": 0.73, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "spices", "asian-cuisine"] + }, + { + "id": "mem_063", + "content": "Stock is made by simmering bones, aromatic vegetables, and herbs to extract collagen, gelatin, and mineral flavor into a rich base liquid", + "domain": "cooking", + "importance": 0.73, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "techniques", "sauces"] + }, + { + "id": "mem_064", + "content": "Insulin secreted by pancreatic beta cells lowers blood glucose by facilitating cellular uptake of glucose and inhibiting hepatic glucose production", + "domain": "medicine", + "importance": 0.88, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["medicine", "endocrinology", "diabetes"] + }, + { + "id": "mem_065", + "content": "Statins inhibit the HMG-CoA reductase enzyme to reduce hepatic cholesterol synthesis, lowering LDL levels and cardiovascular disease risk", + "domain": "medicine", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "pharmacology", "cardiology"] + }, + { + "id": "mem_066", + "content": "Antibiotic resistance arises when bacteria acquire mutations or resistance genes that inactivate drugs, making infections harder to treat", + "domain": "medicine", + "importance": 0.87, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["medicine", "microbiology", "public-health"] + }, + { + "id": "mem_067", + "content": "Herd immunity protects vulnerable populations when a sufficient proportion of a community is immune, preventing pathogen transmission chains", + "domain": "medicine", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "epidemiology", "vaccines"] + }, + { + "id": "mem_068", + "content": "Cognitive behavioral therapy helps patients identify and restructure maladaptive thought patterns and behaviors that maintain depression and anxiety", + "domain": "medicine", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "psychiatry", "therapy"] + }, + { + "id": "mem_069", + "content": "Pharmacokinetics describes how drugs are absorbed, distributed, metabolized, and excreted by the body, determining dosing intervals and therapeutic windows", + "domain": "medicine", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "pharmacology"] + }, + { + "id": "mem_070", + "content": "Magnetic resonance imaging uses radiofrequency pulses and magnetic fields to excite hydrogen nuclei, creating high-contrast soft tissue images", + "domain": "medicine", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "imaging", "diagnostics"] + }, + { + "id": "mem_071", + "content": "Synaptic plasticity—the strengthening and weakening of synaptic connections through long-term potentiation and depression—is the cellular basis of learning", + "domain": "medicine", + "importance": 0.87, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["medicine", "neuroscience", "memory"] + }, + { + "id": "mem_072", + "content": "Electroencephalography records electrical activity from scalp electrodes to detect seizure patterns, sleep stages, and brain oscillations", + "domain": "medicine", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "neuroscience", "diagnostics"] + }, + { + "id": "mem_073", + "content": "Coagulation cascade involves a series of protease activations that convert fibrinogen to fibrin, forming a blood clot to stop bleeding at injury sites", + "domain": "medicine", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "hematology"] + }, + { + "id": "mem_074", + "content": "Entropy in thermodynamics measures the number of accessible microstates of a system; the second law states that entropy of an isolated system never decreases", + "domain": "physics", + "importance": 0.87, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["physics", "thermodynamics"] + }, + { + "id": "mem_075", + "content": "Electromagnetic radiation spans wavelengths from gamma rays through X-rays, ultraviolet, visible light, infrared, and radio waves across the electromagnetic spectrum", + "domain": "physics", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["physics", "electromagnetism", "optics"] + }, + { + "id": "mem_076", + "content": "Wave-particle duality holds that quantum objects exhibit wave behavior in interference experiments and particle behavior in detection measurements", + "domain": "physics", + "importance": 0.87, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["physics", "quantum", "wave-particle"] + }, + { + "id": "mem_077", + "content": "Dark matter is a form of matter that does not emit electromagnetic radiation but exerts gravitational influence, comprising about 27% of the universe's mass-energy", + "domain": "physics", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["physics", "astrophysics", "cosmology"] + }, + { + "id": "mem_078", + "content": "Superconductivity occurs in certain materials cooled below a critical temperature, allowing electrical current to flow with zero resistance", + "domain": "physics", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["physics", "condensed-matter", "superconductivity"] + }, + { + "id": "mem_079", + "content": "Inflation theory proposes that the universe underwent exponential expansion in the first fraction of a second after the Big Bang, explaining its observed homogeneity", + "domain": "physics", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["physics", "cosmology", "inflation"] + }, + { + "id": "mem_080", + "content": "Supply and demand curves intersect at the market equilibrium price where the quantity consumers want to buy equals what producers want to sell", + "domain": "economics", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "microeconomics", "markets"] + }, + { + "id": "mem_081", + "content": "Inflation erodes purchasing power when the general price level rises over time, measured by indices like the consumer price index and producer price index", + "domain": "economics", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "macroeconomics", "inflation"] + }, + { + "id": "mem_082", + "content": "Comparative advantage explains why nations trade by specializing in goods they produce at lower opportunity cost, even if one nation is more efficient at all goods", + "domain": "economics", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "trade", "international"] + }, + { + "id": "mem_083", + "content": "Monetary policy conducted by central banks adjusts interest rates and money supply to influence inflation, employment, and economic growth", + "domain": "economics", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "macroeconomics", "central-banking"] + }, + { + "id": "mem_084", + "content": "Market failure occurs when free markets allocate resources inefficiently, as in the case of externalities, public goods, information asymmetry, or market power", + "domain": "economics", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "microeconomics", "market-failure"] + }, + { + "id": "mem_085", + "content": "Game theory models strategic interactions where agents choose actions that depend on expected responses from other rational actors", + "domain": "economics", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "game-theory", "strategy"] + }, + { + "id": "mem_086", + "content": "Phonemes are the minimal contrastive sound units of a language; changing one phoneme can change the meaning of a word, as in bat versus cat", + "domain": "linguistics", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["linguistics", "phonology"] + }, + { + "id": "mem_087", + "content": "Syntactic trees represent the hierarchical constituency structure of sentences, showing how phrases are nested inside larger phrases", + "domain": "linguistics", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["linguistics", "syntax"] + }, + { + "id": "mem_088", + "content": "Linguistic relativity, or the Sapir-Whorf hypothesis, proposes that the language one speaks influences how one perceives and categorizes reality", + "domain": "linguistics", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["linguistics", "semantics", "cognition"] + }, + { + "id": "mem_089", + "content": "Language acquisition in children proceeds through cooing, babbling, one-word, and multi-word stages, with critical period effects on phonological learning", + "domain": "linguistics", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["linguistics", "acquisition", "development"] + }, + { + "id": "mem_090", + "content": "Loanwords enter a language when speakers adopt foreign vocabulary for new concepts, technologies, or cultural practices, often adapting pronunciation to native phonotactics", + "domain": "linguistics", + "importance": 0.73, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["linguistics", "lexicology", "contact"] + }, + { + "id": "mem_091", + "content": "Morphology studies how words are built from smaller meaningful units called morphemes, including roots, prefixes, and suffixes", + "domain": "linguistics", + "importance": 0.77, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["linguistics", "morphology"] + }, + { + "id": "mem_092", + "content": "Pragma was an ancient Greek word for action or affair; pragmatics studies how context shapes meaning beyond literal sentence semantics", + "domain": "linguistics", + "importance": 0.73, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["linguistics", "pragmatics"] + }, + { + "id": "mem_093", + "content": "Feature scaling standardizes input variables to a common range before training, preventing features with large numeric values from dominating gradient updates", + "domain": "ml", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "preprocessing"] + }, + { + "id": "mem_094", + "content": "Principal component analysis projects high-dimensional data onto orthogonal axes of maximum variance, enabling dimensionality reduction and visualization", + "domain": "ml", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "dimensionality-reduction"] + }, + { + "id": "mem_095", + "content": "Support vector machines find the maximum-margin hyperplane separating classes in a high-dimensional feature space, with kernel functions for nonlinear boundaries", + "domain": "ml", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "classification", "svm"] + }, + { + "id": "mem_096", + "content": "Random forests aggregate predictions from many decision trees trained on bootstrap samples and random feature subsets to reduce overfitting and variance", + "domain": "ml", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "ensemble", "decision-trees"] + }, + { + "id": "mem_097", + "content": "Variational autoencoders learn a compressed latent representation of data by encoding inputs into a probability distribution and reconstructing from samples", + "domain": "ml", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "generative", "vae"] + }, + { + "id": "mem_098", + "content": "The citric acid cycle, also called the Krebs cycle, oxidizes acetyl-CoA derived from glucose and fatty acids, producing NADH and FADH2 for ATP synthesis", + "domain": "biology", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "metabolism", "biochemistry"] + }, + { + "id": "mem_099", + "content": "Apoptosis is programmed cell death triggered by internal or external signals, removing damaged or unnecessary cells without causing inflammation", + "domain": "biology", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "cell", "death"] + }, + { + "id": "mem_100", + "content": "Vesicles transport cargo proteins between organelles in eukaryotic cells, with SNARE proteins mediating membrane fusion at the destination compartment", + "domain": "biology", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "cell", "transport"] + }, + { + "id": "mem_101", + "content": "Microservices architecture decomposes an application into small independently deployable services that communicate over APIs or message queues", + "domain": "programming", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "architecture", "microservices"] + }, + { + "id": "mem_102", + "content": "Tensor algebra represents multi-dimensional arrays and their operations, forming the computational foundation of modern deep learning frameworks", + "domain": "ml", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "math", "tensors"] + }, + { + "id": "mem_103", + "content": "Plate boundaries where two plates converge produce subduction zones, trenches, and arc volcanoes; divergent boundaries produce mid-ocean ridges and rift valleys", + "domain": "geography", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "geology", "tectonics"] + }, + { + "id": "mem_104", + "content": "Ancient Mesopotamia between the Tigris and Euphrates rivers developed the earliest writing system—cuneiform—along with the first legal codes and urban civilizations", + "domain": "history", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "ancient", "mesopotamia"] + }, + { + "id": "mem_105", + "content": "Sous vide cooking seals food in vacuum bags and cooks it in precisely temperature-controlled water baths, achieving uniform doneness impossible with conventional heat", + "domain": "cooking", + "importance": 0.73, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "techniques", "precision"] + }, + { + "id": "mem_106", + "content": "Nash equilibrium describes a stable outcome in strategic games where no player can benefit by unilaterally changing their strategy given others' choices", + "domain": "economics", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "game-theory"] + }, + { + "id": "mem_107", + "content": "Germ theory of disease, established by Pasteur and Koch, holds that infectious diseases are caused by microorganisms that invade the host", + "domain": "medicine", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "microbiology", "history"] + }, + { + "id": "mem_108", + "content": "Photovoltaic cells convert incident photons into electrical current when absorbed photons excite electrons across a semiconductor bandgap", + "domain": "physics", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["physics", "optics", "solar-energy"] + }, + { + "id": "mem_109", + "content": "The bond market prices sovereign debt using yield curves that reflect maturity, credit risk, and expected future interest rate movements", + "domain": "economics", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "finance", "bonds"] + }, + { + "id": "mem_110", + "content": "Semantic shift is the process by which a word gradually changes meaning over generations, often through metaphor, generalization, or specialization", + "domain": "linguistics", + "importance": 0.73, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["linguistics", "semantics", "change"] + }, + { + "id": "mem_111", + "content": "Bought a new mechanical keyboard today with Cherry MX brown switches; the tactile bump is perfect for long coding sessions without the noise of clicky switches", + "domain": "distractor", + "importance": 0.40, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "hardware"] + }, + { + "id": "mem_112", + "content": "The movie was entertaining but the ending felt rushed; the director should have given the third act at least another twenty minutes to resolve the subplot properly", + "domain": "distractor", + "importance": 0.35, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "entertainment"] + }, + { + "id": "mem_113", + "content": "Checked in for the flight at 5am; the airport coffee was terrible but the lounge had decent wifi so managed to finish the slide deck before boarding", + "domain": "distractor", + "importance": 0.30, + "decay_factor": 0.06, + "memory_type": "episodic", + "tags": ["personal", "travel"] + }, + { + "id": "mem_114", + "content": "The apartment landlord finally fixed the leaking pipe under the kitchen sink after three weeks of follow-up emails and one formal complaint letter", + "domain": "distractor", + "importance": 0.28, + "decay_factor": 0.06, + "memory_type": "episodic", + "tags": ["personal", "home"] + }, + { + "id": "mem_115", + "content": "Tried making sourdough for the first time; the starter is active after four days of feeding and the kitchen smells wonderfully yeasty and sour", + "domain": "distractor", + "importance": 0.38, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "cooking", "hobby"] + }, + { + "id": "mem_116", + "content": "Finished reading a novel about a lighthouse keeper in 1920s Newfoundland; the prose was beautifully sparse and the isolation felt viscerally real", + "domain": "distractor", + "importance": 0.32, + "decay_factor": 0.06, + "memory_type": "episodic", + "tags": ["personal", "reading"] + }, + { + "id": "mem_117", + "content": "The afternoon sun angle through the west window is now different enough from summer that I need to adjust my monitor position again to avoid glare", + "domain": "distractor", + "importance": 0.22, + "decay_factor": 0.07, + "memory_type": "episodic", + "tags": ["personal", "home"] + }, + { + "id": "mem_118", + "content": "Picked up the dry cleaning and noticed the collar on the blue shirt has a faint yellow stain that the cleaner missed; need to bring it back next week", + "domain": "distractor", + "importance": 0.18, + "decay_factor": 0.08, + "memory_type": "episodic", + "tags": ["personal", "errands"] + }, + { + "id": "mem_119", + "content": "The Wi-Fi router needed a firmware update; after flashing it the 5GHz band dropped connection randomly for two hours until a second reboot fixed it", + "domain": "distractor", + "importance": 0.32, + "decay_factor": 0.06, + "memory_type": "episodic", + "tags": ["personal", "hardware", "networking"] + }, + { + "id": "mem_120", + "content": "Saw a heron standing completely still in the creek behind the park for almost twenty minutes before it finally struck at something in the water", + "domain": "distractor", + "importance": 0.25, + "decay_factor": 0.06, + "memory_type": "episodic", + "tags": ["personal", "nature"] + }, + { + "id": "mem_121", + "content": "Tried a new ramen shop downtown; the broth was tonkotsu-style, rich and cloudy with a strong pork bone flavor, and the chashu was melt-in-your-mouth tender", + "domain": "distractor", + "importance": 0.35, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "food", "restaurant"] + }, + { + "id": "mem_122", + "content": "Signed up for a pottery class starting next month; always wanted to learn wheel throwing but never made the time before", + "domain": "distractor", + "importance": 0.28, + "decay_factor": 0.06, + "memory_type": "episodic", + "tags": ["personal", "hobby"] + }, + { + "id": "mem_123", + "content": "The plumber came to replace the water heater; the new one is tankless and should save about thirty percent on the monthly energy bill", + "domain": "distractor", + "importance": 0.25, + "decay_factor": 0.07, + "memory_type": "episodic", + "tags": ["personal", "home", "energy"] + }, + { + "id": "mem_124", + "content": "Ordered replacement brake pads for the bicycle; the organic pads wear faster than sintered but the modulation is much more predictable on wet descents", + "domain": "distractor", + "importance": 0.30, + "decay_factor": 0.06, + "memory_type": "episodic", + "tags": ["personal", "cycling"] + }, + { + "id": "mem_125", + "content": "The orchid on the windowsill bloomed for the third time this year; apparently repotting it into bark medium and reducing watering frequency was the key", + "domain": "distractor", + "importance": 0.22, + "decay_factor": 0.07, + "memory_type": "episodic", + "tags": ["personal", "plants"] + }, + { + "id": "mem_126", + "content": "Watched a documentary about deep-sea hydrothermal vents and the extremophile communities that thrive there without sunlight, driven entirely by chemosynthesis", + "domain": "distractor", + "importance": 0.40, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "science-interest"] + }, + { + "id": "mem_127", + "content": "Had coffee with an old colleague who now works on satellite telemetry; fascinating conversation about the latency tradeoffs in low-earth-orbit constellations", + "domain": "distractor", + "importance": 0.38, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "conversation"] + }, + { + "id": "mem_128", + "content": "The grocery delivery app sent the wrong brand of olive oil again; at least this one is also extra-virgin so the flavor difference is minimal", + "domain": "distractor", + "importance": 0.15, + "decay_factor": 0.09, + "memory_type": "episodic", + "tags": ["personal", "errands", "food"] + }, + { + "id": "mem_129", + "content": "Set up a new aquarium with a planted substrate and CO2 injection; the stem plants are already putting out visible new growth after just five days", + "domain": "distractor", + "importance": 0.28, + "decay_factor": 0.06, + "memory_type": "episodic", + "tags": ["personal", "hobby"] + }, + { + "id": "mem_130", + "content": "Photosynthesis in C4 plants like corn and sugarcane uses a carbon-concentrating mechanism that reduces photorespiration and improves water-use efficiency in hot climates", + "domain": "biology", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "plants", "photosynthesis"] + }, + { + "id": "mem_131", + "content": "Neural oscillations at gamma frequencies around 40Hz in the prefrontal cortex correlate with working memory maintenance and conscious attention", + "domain": "medicine", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "neuroscience", "cognition"] + }, + { + "id": "mem_132", + "content": "The Great Barrier Reef is the world's largest coral reef system, spanning 2,300 kilometers along the northeast Australian coast and hosting enormous marine biodiversity", + "domain": "geography", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "ocean", "australia"] + }, + { + "id": "mem_133", + "content": "Marginal cost is the additional cost incurred by producing one more unit of output; profit-maximizing firms produce until marginal cost equals marginal revenue", + "domain": "economics", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "microeconomics", "production"] + }, + { + "id": "mem_134", + "content": "Pitch accent systems assign high or low tones to syllables to distinguish word meaning, as in Japanese, Swedish, and some dialects of Serbian", + "domain": "linguistics", + "importance": 0.72, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["linguistics", "phonology", "prosody"] + }, + { + "id": "mem_135", + "content": "The photoelectric effect—that light ejects electrons from metal surfaces only above a threshold frequency—led Einstein to propose the photon model of light", + "domain": "physics", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["physics", "quantum", "history"] + }, + { + "id": "mem_136", + "content": "Immune checkpoint inhibitors block proteins like PD-1 and CTLA-4 that normally suppress T-cell activation, unleashing anti-tumor immune responses in cancer patients", + "domain": "medicine", + "importance": 0.85, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["medicine", "oncology", "immunotherapy"] + }, + { + "id": "mem_137", + "content": "The Coriolis effect causes moving air and water masses to deflect rightward in the northern hemisphere and leftward in the southern hemisphere due to Earth's rotation", + "domain": "geography", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "meteorology", "physics"] + }, + { + "id": "mem_138", + "content": "Dependency injection decouples classes from their dependencies by passing required objects through constructor parameters rather than creating them internally", + "domain": "programming", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "design-patterns", "oop"] + }, + { + "id": "mem_139", + "content": "Protectionism uses tariffs, quotas, and subsidies to shield domestic industries from foreign competition, often reducing allocative efficiency but preserving employment", + "domain": "economics", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "trade", "policy"] + }, + { + "id": "mem_140", + "content": "Pidgins form as simplified contact languages between speakers of mutually unintelligible tongues; when children acquire a pidgin as their first language it becomes a creole", + "domain": "linguistics", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["linguistics", "contact", "creolization"] + }, + { + "id": "mem_141", + "content": "Dropped my phone walking to the metro and cracked the screen protector; the display underneath seems fine but I should replace the protector before the cracks spread", + "domain": "distractor", + "importance": 0.20, + "decay_factor": 0.08, + "memory_type": "episodic", + "tags": ["personal", "errands"] + }, + { + "id": "mem_142", + "content": "The quarterly rent increase notice arrived; the landlord is citing rising property taxes and maintenance costs as justification for an eight percent hike", + "domain": "distractor", + "importance": 0.30, + "decay_factor": 0.06, + "memory_type": "episodic", + "tags": ["personal", "housing"] + }, + { + "id": "mem_143", + "content": "Watched a chess tournament stream; the world champion's endgame technique in the king-and-pawn endgame was surgical—every tempo precisely calculated", + "domain": "distractor", + "importance": 0.35, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "chess"] + }, + { + "id": "mem_144", + "content": "The cat knocked over the succulent arrangement on the bookshelf for the second time this week; need to find a higher shelf or a hanging planter", + "domain": "distractor", + "importance": 0.12, + "decay_factor": 0.10, + "memory_type": "episodic", + "tags": ["personal", "home", "pets"] + }, + { + "id": "mem_145", + "content": "Finished the online Spanish course level B1; the subjunctive mood is still tricky but the reading comprehension has improved noticeably over the past three months", + "domain": "distractor", + "importance": 0.35, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "language-learning"] + }, + { + "id": "mem_146", + "content": "The hiking trail was muddy after the rain but the views from the ridge were spectacular; took about four hours round trip including a thirty minute lunch stop", + "domain": "distractor", + "importance": 0.25, + "decay_factor": 0.07, + "memory_type": "episodic", + "tags": ["personal", "outdoor", "hiking"] + }, + { + "id": "mem_147", + "content": "Annealing in metallurgy heats a metal to just below its melting point and then cools it slowly, relieving internal stresses and improving ductility", + "domain": "physics", + "importance": 0.73, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["physics", "materials", "metallurgy"] + }, + { + "id": "mem_148", + "content": "The Silk Road connected Han Dynasty China to the Roman Empire via Central Asian oases, facilitating trade in silk, spices, and glassware across Eurasia", + "domain": "history", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "trade", "ancient"] + }, + { + "id": "mem_149", + "content": "Osmosis is the movement of water across a semipermeable membrane from a region of lower solute concentration to higher solute concentration", + "domain": "biology", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "cell", "transport"] + }, + { + "id": "mem_150", + "content": "Terroir refers to the complete natural environment—soil composition, microclimate, topography, and local microflora—that gives wines their distinctive regional character", + "domain": "cooking", + "importance": 0.70, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["cooking", "wine", "geography"] + }, + { + "id": "mem_151", + "content": "The Fourier transform decomposes a time-domain signal into its constituent frequencies, enabling spectral analysis and signal filtering operations", + "domain": "physics", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["physics", "math", "signal-processing"] + }, + { + "id": "mem_152", + "content": "Stock options give holders the right but not obligation to buy or sell shares at a fixed strike price before expiration, enabling leveraged speculation and hedging", + "domain": "economics", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "finance", "derivatives"] + }, + { + "id": "mem_153", + "content": "Conlang designers like Tolkien and Okrand create invented languages with consistent phonology, morphology, and syntax for fictional worlds or linguistic experiments", + "domain": "linguistics", + "importance": 0.68, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["linguistics", "conlangs"] + }, + { + "id": "mem_154", + "content": "The human gut microbiome contains trillions of bacteria that influence digestion, immune regulation, and possibly mood through the gut-brain axis", + "domain": "biology", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "microbiome", "health"] + }, + { + "id": "mem_155", + "content": "Colonoscopy uses a flexible camera to inspect the entire colon, detecting polyps that can be removed to prevent colorectal cancer progression", + "domain": "medicine", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "oncology", "screening"] + }, + { + "id": "mem_156", + "content": "The Hanging Gardens of Babylon, if they existed, were reportedly an ascending series of terraced gardens irrigated by pumps drawing from the Euphrates River", + "domain": "distractor", + "importance": 0.55, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["history", "ancient", "wonders"] + }, + { + "id": "mem_157", + "content": "Gradient boosting builds an ensemble of weak learners sequentially, with each tree fitting the residuals left by the previous ones to minimize a differentiable loss", + "domain": "ml", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "ensemble", "boosting"] + }, + { + "id": "mem_158", + "content": "Diffusion models learn to reverse a gradual noise-addition process, generating high-quality images by iteratively denoising from pure Gaussian noise", + "domain": "ml", + "importance": 0.85, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["ml", "generative", "diffusion"] + }, + { + "id": "mem_159", + "content": "Quorum sensing allows bacteria to detect population density by accumulating signaling molecules and trigger coordinated gene expression at threshold concentrations", + "domain": "biology", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "microbiology", "gene-regulation"] + }, + { + "id": "mem_160", + "content": "Urban heat islands form where concrete and asphalt absorb and retain solar energy, raising city temperatures several degrees above surrounding rural areas", + "domain": "geography", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "urban", "climate"] + }, + { + "id": "mem_161", + "content": "Bought a new espresso machine with PID temperature control and a pressure profiling kit; dialing in the extraction for the Ethiopian natural process beans took about twenty shots", + "domain": "distractor", + "importance": 0.38, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "coffee", "hobby"] + }, + { + "id": "mem_162", + "content": "The mortgage refinancing closed at a lower rate than expected; the break-even on closing costs works out to about eighteen months of reduced payments", + "domain": "distractor", + "importance": 0.35, + "decay_factor": 0.05, + "memory_type": "episodic", + "tags": ["personal", "finance"] + }, + { + "id": "mem_163", + "content": "Formal verification uses mathematical proof techniques to guarantee that a program or hardware design satisfies specified correctness properties", + "domain": "programming", + "importance": 0.82, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "verification", "formal-methods"] + }, + { + "id": "mem_164", + "content": "Protein folding determines three-dimensional structure from the amino acid sequence; misfolding into beta-sheet aggregates causes diseases like Alzheimer's and Parkinson's", + "domain": "biology", + "importance": 0.85, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["biology", "proteins", "disease"] + }, + { + "id": "mem_165", + "content": "The Paris Agreement commits signatories to limit warming to 1.5 degrees Celsius above pre-industrial levels through nationally determined carbon reduction targets", + "domain": "geography", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "climate", "policy"] + }, + { + "id": "mem_166", + "content": "Microwave ovens heat food by exciting water molecules with 2.45 GHz electromagnetic radiation, causing rapid internal heating through dielectric absorption", + "domain": "distractor", + "importance": 0.48, + "decay_factor": 0.04, + "memory_type": "semantic", + "tags": ["physics", "cooking", "appliances"] + }, + { + "id": "mem_167", + "content": "Constitutional monarchies retain a hereditary monarch as ceremonial head of state while an elected parliament and prime minister hold actual governing power", + "domain": "history", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "government", "monarchy"] + }, + { + "id": "mem_168", + "content": "Seismographs detect and record ground motion from earthquakes by measuring relative displacement between a suspended mass and the moving earth frame", + "domain": "geography", + "importance": 0.73, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "geology", "seismology"] + }, + { + "id": "mem_169", + "content": "Central bank quantitative easing purchases government bonds to inject liquidity and suppress long-term interest rates when conventional monetary policy is constrained", + "domain": "economics", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "macroeconomics", "monetary-policy"] + }, + { + "id": "mem_170", + "content": "The smell of baking bread triggers strong autobiographical memories because the olfactory bulb connects directly to hippocampal memory circuits without thalamic relay", + "domain": "distractor", + "importance": 0.45, + "decay_factor": 0.04, + "memory_type": "semantic", + "tags": ["neuroscience", "memory", "smell"] + }, + { + "id": "mem_171", + "content": "Optogenetics uses genetically encoded light-sensitive proteins to activate or silence specific neurons with millisecond precision in behaving animals", + "domain": "medicine", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "neuroscience", "tools"] + }, + { + "id": "mem_172", + "content": "Diphthongs are complex vowel sounds where the tongue glides from one position to another within a single syllable, as in the English words bite and boat", + "domain": "linguistics", + "importance": 0.70, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["linguistics", "phonology", "vowels"] + }, + { + "id": "mem_173", + "content": "Arbitrage exploits price differences for the same asset across markets, and in efficient markets arbitrage opportunities are eliminated almost instantly", + "domain": "economics", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["economics", "finance", "markets"] + }, + { + "id": "mem_174", + "content": "The fermentation of milk by Lactobacillus acidophilus and other bacteria lowers pH through lactic acid production, thickening the milk and preserving it as yogurt", + "domain": "cooking", + "importance": 0.73, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "fermentation", "dairy"] + }, + { + "id": "mem_175", + "content": "Virtual memory allows processes to use more memory than physically installed by mapping pages to disk, with the operating system swapping pages on demand", + "domain": "programming", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "systems", "memory"] + }, + { + "id": "mem_176", + "content": "The decolonization movement after World War II led dozens of African and Asian nations to achieve independence from European colonial powers between 1945 and 1975", + "domain": "history", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "colonization", "20th-century"] + }, + { + "id": "mem_177", + "content": "Sleep is best right now and I am too tired to write anything coherent; this entry is just a placeholder to note that Tuesday was completely uneventful", + "domain": "distractor", + "importance": 0.10, + "decay_factor": 0.10, + "memory_type": "episodic", + "tags": ["personal", "daily"] + }, + { + "id": "mem_178", + "content": "Scoville scale measures capsaicin concentration in peppers by expressing the dilution factor required for a trained panel to detect no heat, ranging from zero for bell peppers to millions for Carolina Reapers", + "domain": "cooking", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["cooking", "peppers", "measurement"] + }, + { + "id": "mem_179", + "content": "Convolutional feature maps from early layers detect edges and textures while deeper layers respond to object parts and whole objects in computer vision models", + "domain": "ml", + "importance": 0.83, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "cnn", "interpretability"] + }, + { + "id": "mem_180", + "content": "Continental drift was first proposed by Alfred Wegener in 1912 based on the matching coastlines of Africa and South America and similar fossil assemblages on both continents", + "domain": "geography", + "importance": 0.78, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "geology", "history-of-science"] + }, + { + "id": "mem_181", + "content": "Finally got the standing desk assembled; the cable management took longer than the actual desk but the result is much cleaner than the old setup", + "domain": "distractor", + "importance": 0.22, + "decay_factor": 0.08, + "memory_type": "episodic", + "tags": ["personal", "home", "workspace"] + }, + { + "id": "mem_182", + "content": "The team trivia night was won by the table in the back corner; the decisive question about 1980s pop music eliminated the engineering teams early", + "domain": "distractor", + "importance": 0.18, + "decay_factor": 0.09, + "memory_type": "episodic", + "tags": ["personal", "social"] + }, + { + "id": "mem_183", + "content": "Renewed the car registration at the DMV; the whole process took 45 minutes including the wait, which is much faster than last year", + "domain": "distractor", + "importance": 0.12, + "decay_factor": 0.10, + "memory_type": "episodic", + "tags": ["personal", "errands"] + }, + { + "id": "mem_184", + "content": "The neighbor's dog barked at 3am again; need to figure out whether to leave a polite note or just buy better earplugs", + "domain": "distractor", + "importance": 0.10, + "decay_factor": 0.10, + "memory_type": "episodic", + "tags": ["personal", "home"] + }, + { + "id": "mem_185", + "content": "Bought new running shoes; the extra cushioning in the heel is immediately noticeable and the wide toe box should help with the blister problem", + "domain": "distractor", + "importance": 0.25, + "decay_factor": 0.07, + "memory_type": "episodic", + "tags": ["personal", "fitness", "shopping"] + }, + { + "id": "mem_186", + "content": "The ferry crossing was delayed by 90 minutes due to fog; eventually cleared and the sunset views over the water made up for the wait", + "domain": "distractor", + "importance": 0.20, + "decay_factor": 0.08, + "memory_type": "episodic", + "tags": ["personal", "travel"] + }, + { + "id": "mem_187", + "content": "Just discovered that the downstairs bathroom tap drips exactly 47 times per minute; it is oddly hypnotic and very annoying at midnight", + "domain": "distractor", + "importance": 0.08, + "decay_factor": 0.10, + "memory_type": "episodic", + "tags": ["personal", "home"] + }, + { + "id": "mem_188", + "content": "Left the umbrella on the train for the third time this year; the lost property office said to check back in a week if nobody claims it", + "domain": "distractor", + "importance": 0.10, + "decay_factor": 0.10, + "memory_type": "episodic", + "tags": ["personal", "errands"] + }, + { + "id": "mem_189", + "content": "The hardware store was out of the specific screw size needed; ended up ordering online and the shipment arrives Thursday, delaying the shelf project", + "domain": "distractor", + "importance": 0.12, + "decay_factor": 0.10, + "memory_type": "episodic", + "tags": ["personal", "home", "diy"] + }, + { + "id": "mem_190", + "content": "Noticed the pigeon that has been nesting on the air conditioning unit outside is now raising two chicks; they are surprisingly loud for such small birds", + "domain": "distractor", + "importance": 0.18, + "decay_factor": 0.09, + "memory_type": "episodic", + "tags": ["personal", "nature"] + }, + { + "id": "mem_191", + "content": "Switched from manual to electric toothbrush after the dentist's suggestion; the timer feature is useful but the charging dock takes up too much counter space", + "domain": "distractor", + "importance": 0.14, + "decay_factor": 0.09, + "memory_type": "episodic", + "tags": ["personal", "health-routine"] + }, + { + "id": "mem_192", + "content": "Finished assembling the flat-pack bookcase; the instructions included one completely ambiguous diagram that required three restarts to decipher correctly", + "domain": "distractor", + "importance": 0.15, + "decay_factor": 0.09, + "memory_type": "episodic", + "tags": ["personal", "home", "diy"] + }, + { + "id": "mem_193", + "content": "The ice cream shop on the corner finally opened after its winter break; the salted caramel was worth the wait but the queue was 30 minutes long", + "domain": "distractor", + "importance": 0.18, + "decay_factor": 0.09, + "memory_type": "episodic", + "tags": ["personal", "food", "local"] + }, + { + "id": "mem_194", + "content": "Power outage for three hours during the workday; made an unexpected productive afternoon of offline reading and handwritten notes", + "domain": "distractor", + "importance": 0.22, + "decay_factor": 0.08, + "memory_type": "episodic", + "tags": ["personal", "daily"] + }, + { + "id": "mem_195", + "content": "Lent the camping gear to a friend for the weekend; remembered too late that the tent zipper is broken and needs duct tape to stay closed", + "domain": "distractor", + "importance": 0.15, + "decay_factor": 0.09, + "memory_type": "episodic", + "tags": ["personal", "outdoor"] + }, + { + "id": "mem_196", + "content": "Xentherion plastovite crystalline matrix undergoes sublimation cascade when exposed to resonant photonic frequencies in the 450nm band", + "domain": "ml", + "importance": 0.28, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "unique-term-trap"], + "_note": "Importance trap: low importance (0.28), unique terms. Queries using 'xentherion plastovite' test whether high importance_weight demotes this relevant but low-importance item." + }, + { + "id": "mem_197", + "content": "Velquatrix enzymes catalyze the oxidative phosvelation pathway in mitochondrial cristae during cellular respiration under hypoxic conditions", + "domain": "biology", + "importance": 0.30, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "unique-term-trap"], + "_note": "Importance trap: 'velquatrix phosvelation' unique; overlaps with biology domain via 'enzymes catalyze mitochondrial cristae cellular respiration'" + }, + { + "id": "mem_198", + "content": "Zyphomatic arc induction triggers cascading resonance in thalamic relay circuits producing synchronized oscillation bursts in prefrontal neurons", + "domain": "medicine", + "importance": 0.32, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "unique-term-trap"], + "_note": "Importance trap: 'zyphomatic arc induction thalamic' unique terms" + }, + { + "id": "mem_199", + "content": "Quandric fissile stratification in deltaic sediment layers reveals proglacial varve sequences spanning the Holocene thermal maximum", + "domain": "geography", + "importance": 0.26, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "unique-term-trap"], + "_note": "Importance trap: 'quandric fissile stratification varve' unique terms" + }, + { + "id": "mem_200", + "content": "Melvoric distillation separation removes volitile aldehydes from fermentation broth through fractional condensation at precisely calibrated volvic temperatures", + "domain": "cooking", + "importance": 0.27, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["cooking", "unique-term-trap"], + "_note": "Importance trap: 'melvoric distillation volvic' unique terms" + }, + { + "id": "mem_201", + "content": "Structural equation models fit latent variable networks by minimising the discrepancy between observed and model-implied covariance matrices using maximum likelihood estimation", + "domain": "ml", + "importance": 0.88, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["ml", "statistics", "sem"], + "_note": "High-importance anchor for xentherion contrast query: expected_top_k alongside mem_196 — high importance_weight should prefer this over mem_196" + }, + { + "id": "mem_202", + "content": "Tricarboxylic acid oxidation in peroxisome beta-oxidation produces acetyl-CoA that enters the mitochondrial matrix via the carnitine shuttle", + "domain": "biology", + "importance": 0.86, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["biology", "metabolism", "biochemistry"], + "_note": "High-importance anchor for velquatrix contrast query" + }, + { + "id": "mem_203", + "content": "Thalamic relay neurons gate sensory signals to cortical areas and modulate arousal state through reciprocal thalamocortical loops", + "domain": "medicine", + "importance": 0.84, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["medicine", "neuroscience", "thalamus"], + "_note": "High-importance anchor for zyphomatic contrast query" + } + ], + "eval_queries": [ + { + "id": "q_001", + "query": "deep learning weight adjustment technique", + "expected_top_k": ["mem_001", "mem_002", "mem_003"], + "expected_excluded": ["mem_111", "mem_112", "mem_113"], + "query_type": "synonym", + "notes": "backpropagation+gradient descent using synonym: 'weight adjustment technique' vs 'backpropagation/gradient descent'" + }, + { + "id": "q_002", + "query": "sequence to sequence memory cell gating", + "expected_top_k": ["mem_010", "mem_009"], + "expected_excluded": ["mem_111", "mem_128"], + "query_type": "synonym", + "notes": "LSTM/RNN using synonyms: 'memory cell gating' vs 'long short-term memory gating mechanisms'" + }, + { + "id": "q_003", + "query": "self-attention parallelizable architecture language model", + "expected_top_k": ["mem_004", "mem_005"], + "expected_excluded": ["mem_009", "mem_010"], + "query_type": "partial", + "notes": "Transformer attention — partial match, 'self-attention' not in mem_004/005 verbatim" + }, + { + "id": "q_004", + "query": "regularization prevent overfitting randomly zero activations", + "expected_top_k": ["mem_006"], + "expected_excluded": ["mem_007", "mem_093"], + "query_type": "partial", + "notes": "Dropout — partial description without the word 'dropout'" + }, + { + "id": "q_005", + "query": "pretrained model adaptation new task", + "expected_top_k": ["mem_011"], + "expected_excluded": ["mem_012", "mem_015"], + "query_type": "synonym", + "notes": "'pretrained model adaptation' vs 'transfer learning fine-tuning'" + }, + { + "id": "q_006", + "query": "ensemble of shallow trees residual fitting sequential boosting", + "expected_top_k": ["mem_157", "mem_096"], + "expected_excluded": ["mem_001", "mem_002"], + "query_type": "synonym", + "notes": "'ensemble of shallow trees residual fitting' vs 'gradient boosting decision trees'" + }, + { + "id": "q_007", + "query": "denoising score matching generative image synthesis", + "expected_top_k": ["mem_158"], + "expected_excluded": ["mem_097", "mem_006"], + "query_type": "synonym", + "notes": "'denoising score matching' vs 'diffusion models'; generative is present in both" + }, + { + "id": "q_008", + "query": "cell energy currency production organelle", + "expected_top_k": ["mem_016", "mem_017"], + "expected_excluded": ["mem_020", "mem_025"], + "query_type": "synonym", + "notes": "'cell energy currency' = ATP = mitochondria; 'organelle' present; 'cellular respiration' = metabolism" + }, + { + "id": "q_009", + "query": "hereditary trait selection reproductive advantage", + "expected_top_k": ["mem_019"], + "expected_excluded": ["mem_024", "mem_025"], + "query_type": "partial", + "notes": "Natural selection described without using those words" + }, + { + "id": "q_010", + "query": "protein blueprint transported from nucleus to ribosome", + "expected_top_k": ["mem_018"], + "expected_excluded": ["mem_020", "mem_023"], + "query_type": "synonym", + "notes": "'protein blueprint' = mRNA; 'transported from nucleus' = transcription + export" + }, + { + "id": "q_011", + "query": "biological catalyst lowers activation barrier chemical transformation", + "expected_top_k": ["mem_020"], + "expected_excluded": ["mem_016", "mem_017"], + "query_type": "synonym", + "notes": "'biological catalyst lowers activation barrier' vs 'enzyme lowers activation energy'" + }, + { + "id": "q_012", + "query": "genome sequence base pairs chromosome count", + "expected_top_k": ["mem_023"], + "expected_excluded": ["mem_018", "mem_024"], + "query_type": "partial", + "notes": "Partial match: 'base pairs' and 'chromosome' present, 'genome sequence' not verbatim in mem_023" + }, + { + "id": "q_013", + "query": "cooperative multitasking lightweight concurrency event loop", + "expected_top_k": ["mem_027"], + "expected_excluded": ["mem_030", "mem_033"], + "query_type": "synonym", + "notes": "'cooperative multitasking lightweight concurrency' vs 'async runtime Tokio tasks'" + }, + { + "id": "q_014", + "query": "heap memory reclamation unreachable objects automatic", + "expected_top_k": ["mem_031"], + "expected_excluded": ["mem_026", "mem_175"], + "query_type": "synonym", + "notes": "'heap memory reclamation unreachable objects' vs 'garbage collection'" + }, + { + "id": "q_015", + "query": "distributed log replication leader election fault tolerance", + "expected_top_k": ["mem_035"], + "expected_excluded": ["mem_036", "mem_030"], + "query_type": "partial", + "notes": "Raft consensus — 'leader election' and 'log replication' present without saying 'Raft'" + }, + { + "id": "q_016", + "query": "code correctness mathematical proof guarantees", + "expected_top_k": ["mem_163"], + "expected_excluded": ["mem_032", "mem_028"], + "query_type": "synonym", + "notes": "'mathematical proof guarantees' vs 'formal verification'; adjacent programming domain" + }, + { + "id": "q_017", + "query": "printing technology mass text reproduction reformation", + "expected_top_k": ["mem_039"], + "expected_excluded": ["mem_038", "mem_040"], + "query_type": "partial", + "notes": "Gutenberg printing press — 'mass text reproduction' is partial synonym" + }, + { + "id": "q_018", + "query": "superpower nuclear standoff proxy conflict rivalry", + "expected_top_k": ["mem_043"], + "expected_excluded": ["mem_044", "mem_040"], + "query_type": "synonym", + "notes": "'superpower nuclear standoff proxy conflict' vs 'Cold War US-USSR nuclear deterrence proxy wars'" + }, + { + "id": "q_019", + "query": "Japan feudalism abolition western industrialization", + "expected_top_k": ["mem_042"], + "expected_excluded": ["mem_038", "mem_043"], + "query_type": "partial", + "notes": "Meiji Restoration — 'feudalism abolition western industrialization' partially describes it" + }, + { + "id": "q_020", + "query": "ancient trade network East West silk spices cultural exchange", + "expected_top_k": ["mem_148"], + "expected_excluded": ["mem_041", "mem_104"], + "query_type": "partial", + "notes": "'trade network East West silk spices' partially matches Silk Road mem_148" + }, + { + "id": "q_021", + "query": "seabed spreading continental movement age", + "expected_top_k": ["mem_048", "mem_180"], + "expected_excluded": ["mem_049", "mem_137"], + "query_type": "synonym", + "notes": "'seabed spreading continental movement' = tectonic plates; Wegener's continental drift" + }, + { + "id": "q_022", + "query": "tropical forest biodiversity river basin South America", + "expected_top_k": ["mem_047"], + "expected_excluded": ["mem_046", "mem_052"], + "query_type": "partial", + "notes": "'tropical forest biodiversity river basin' — Amazon rainforest, partial match" + }, + { + "id": "q_023", + "query": "climate policy carbon emission reduction international agreement", + "expected_top_k": ["mem_165"], + "expected_excluded": ["mem_051", "mem_160"], + "query_type": "synonym", + "notes": "'international agreement carbon emission reduction' vs 'Paris Agreement nationally determined targets'" + }, + { + "id": "q_024", + "query": "browning reaction amino acid sugar heat flavor", + "expected_top_k": ["mem_054"], + "expected_excluded": ["mem_057", "mem_055"], + "query_type": "partial", + "notes": "Maillard reaction: 'browning reaction amino acid sugar' is partial description without the word 'Maillard'" + }, + { + "id": "q_025", + "query": "microorganism sugar conversion fermented food preservation", + "expected_top_k": ["mem_055", "mem_174"], + "expected_excluded": ["mem_054", "mem_063"], + "query_type": "synonym", + "notes": "'microorganism sugar conversion fermented food' vs 'bacteria yeast fermentation'; mem_174 is yogurt fermentation" + }, + { + "id": "q_026", + "query": "savory fifth taste receptor glutamate mushroom soy", + "expected_top_k": ["mem_058"], + "expected_excluded": ["mem_054", "mem_062"], + "query_type": "synonym", + "notes": "'savory fifth taste glutamate mushroom soy' vs 'umami'; no 'umami' in query" + }, + { + "id": "q_027", + "query": "precise temperature water bath cooking uniform doneness", + "expected_top_k": ["mem_105"], + "expected_excluded": ["mem_059", "mem_054"], + "query_type": "synonym", + "notes": "'precise temperature water bath' vs 'sous vide'; no 'sous vide' in query" + }, + { + "id": "q_028", + "query": "cholesterol lowering drug enzyme inhibitor heart disease", + "expected_top_k": ["mem_065"], + "expected_excluded": ["mem_064", "mem_066"], + "query_type": "synonym", + "notes": "'cholesterol lowering drug enzyme inhibitor' vs 'statin HMG-CoA reductase inhibitor'" + }, + { + "id": "q_029", + "query": "bacterial drug resistance mutation treatment failure", + "expected_top_k": ["mem_066"], + "expected_excluded": ["mem_067", "mem_107"], + "query_type": "partial", + "notes": "'drug resistance mutation treatment failure' vs 'antibiotic resistance'; 'antibiotic' not in query" + }, + { + "id": "q_030", + "query": "population immunity threshold pathogen spread prevention", + "expected_top_k": ["mem_067"], + "expected_excluded": ["mem_066", "mem_136"], + "query_type": "synonym", + "notes": "'population immunity threshold' vs 'herd immunity'; no 'herd' in query" + }, + { + "id": "q_031", + "query": "brain synaptic strengthening weakening learning cellular basis", + "expected_top_k": ["mem_071"], + "expected_excluded": ["mem_072", "mem_131"], + "query_type": "synonym", + "notes": "'synaptic strengthening weakening' = long-term potentiation/depression; 'learning cellular basis' matches" + }, + { + "id": "q_032", + "query": "zero electrical resistance cooled material", + "expected_top_k": ["mem_078"], + "expected_excluded": ["mem_074", "mem_075"], + "query_type": "synonym", + "notes": "'zero electrical resistance cooled material' vs 'superconductivity below critical temperature'" + }, + { + "id": "q_033", + "query": "light particle wave duality quantum observation", + "expected_top_k": ["mem_076", "mem_135"], + "expected_excluded": ["mem_075", "mem_077"], + "query_type": "partial", + "notes": "Wave-particle duality + photoelectric effect; 'light particle wave' partially matches both" + }, + { + "id": "q_034", + "query": "price level rise purchasing power currency erosion", + "expected_top_k": ["mem_081"], + "expected_excluded": ["mem_080", "mem_133"], + "query_type": "synonym", + "notes": "'price level rise purchasing power erosion' vs 'inflation erodes purchasing power'" + }, + { + "id": "q_035", + "query": "strategic interaction rational agents optimal response", + "expected_top_k": ["mem_085", "mem_106"], + "expected_excluded": ["mem_080", "mem_082"], + "query_type": "synonym", + "notes": "'strategic interaction rational agents optimal response' = game theory + Nash equilibrium" + }, + { + "id": "q_036", + "query": "international trade specialization opportunity cost efficiency", + "expected_top_k": ["mem_082"], + "expected_excluded": ["mem_139", "mem_083"], + "query_type": "partial", + "notes": "Comparative advantage: 'specialization opportunity cost' is partial description" + }, + { + "id": "q_037", + "query": "sound meaning contrastive unit spoken language", + "expected_top_k": ["mem_086"], + "expected_excluded": ["mem_087", "mem_172"], + "query_type": "synonym", + "notes": "'sound meaning contrastive unit' vs 'phoneme'; 'spoken language' in query but not verbatim in mem_086" + }, + { + "id": "q_038", + "query": "children learn first language speech stages critical window", + "expected_top_k": ["mem_089"], + "expected_excluded": ["mem_088", "mem_090"], + "query_type": "partial", + "notes": "Language acquisition: 'speech stages critical window' partially matches 'critical period'" + }, + { + "id": "q_039", + "query": "food taste region soil microclimate wine character", + "expected_top_k": ["mem_150"], + "expected_excluded": ["mem_058", "mem_062"], + "query_type": "synonym", + "notes": "'food region soil microclimate character' vs 'terroir'; 'wine' is shared but 'terroir' not in query" + }, + { + "id": "q_040", + "query": "protein three-dimensional shape amino acid sequence misfold aggregation disease", + "expected_top_k": ["mem_164"], + "expected_excluded": ["mem_018", "mem_020"], + "query_type": "partial", + "notes": "Protein folding/misfolding: 'three-dimensional shape amino acid sequence misfold aggregation' partial match" + }, + { + "id": "q_041", + "query": "xentherion plastovite crystalline matrix photonic", + "expected_top_k": ["mem_196"], + "expected_excluded": ["mem_111", "mem_112"], + "query_type": "importance_trap", + "notes": "Unique-term query matching only mem_196 (imp=0.28). High importance_weight demotes this low-importance item, hurting MRR. Tests importance_weight discrimination." + }, + { + "id": "q_042", + "query": "velquatrix enzymes phosvelation mitochondrial cristae hypoxic", + "expected_top_k": ["mem_197"], + "expected_excluded": ["mem_016", "mem_020"], + "query_type": "importance_trap", + "notes": "Unique-term query matching only mem_197 (imp=0.30). Tests importance_weight — low importance_weight prefers FTS relevance and finds this; high importance_weight ranks it low." + }, + { + "id": "q_043", + "query": "zyphomatic arc induction thalamic resonance oscillation", + "expected_top_k": ["mem_198", "mem_203"], + "expected_excluded": ["mem_111", "mem_112"], + "query_type": "importance_trap", + "notes": "mem_198 (imp=0.32) and mem_203 (imp=0.84). High importance_weight promotes mem_203 to rank 1 and may push mem_198 below limit. Discriminates importance_weight." + }, + { + "id": "q_044", + "query": "quandric fissile stratification varve proglacial", + "expected_top_k": ["mem_199"], + "expected_excluded": ["mem_046", "mem_048"], + "query_type": "importance_trap", + "notes": "Unique-term query matching only mem_199 (imp=0.26). Pure importance_weight discrimination." + }, + { + "id": "q_045", + "query": "melvoric distillation volvic fermentation aldehydes fractional", + "expected_top_k": ["mem_200"], + "expected_excluded": ["mem_054", "mem_055"], + "query_type": "importance_trap", + "notes": "Unique-term query matching only mem_200 (imp=0.27). Tests importance_weight on low-salience cooking memory." + }, + { + "id": "q_046", + "query": "covariance matrix maximum likelihood latent variable structural", + "expected_top_k": ["mem_201"], + "expected_excluded": ["mem_094", "mem_095"], + "query_type": "importance_trap", + "notes": "High-importance mem_201 (imp=0.88) — unique terms ensure FTS match. Should rank first regardless of importance_weight. Serves as a control query." + }, + { + "id": "q_047", + "query": "carnitine shuttle peroxisome beta-oxidation acetyl-CoA tricarboxylic", + "expected_top_k": ["mem_202"], + "expected_excluded": ["mem_016", "mem_017"], + "query_type": "importance_trap", + "notes": "High-importance mem_202 (imp=0.86) — unique biochemistry terms. Control query: should rank first regardless of importance_weight." + }, + { + "id": "q_048", + "query": "thalamic relay neurons sensory cortical arousal thalamocortical", + "expected_top_k": ["mem_203"], + "expected_excluded": ["mem_198", "mem_071"], + "query_type": "importance_trap", + "notes": "High-importance mem_203 (imp=0.84) with 'thalamic' shared with mem_198 (imp=0.32). Tests ranking when both match. High importance_weight should strongly prefer mem_203." + } + ] +} diff --git a/tests/khive-contract/tests/test_eval_corpus.py b/tests/khive-contract/tests/test_eval_corpus.py new file mode 100644 index 00000000..8ebb7d27 --- /dev/null +++ b/tests/khive-contract/tests/test_eval_corpus.py @@ -0,0 +1,252 @@ +"""Regression tests for memories_corpus_v2.json eval corpus integrity. + +These tests verify: + 1. Schema correctness — every query.expected_top_k references a real memory ID. + 2. Query type coverage — each type appears at least N times. + 3. Distractor count — documented and within acceptable range. + 4. Domain coverage — at least 5 distinct domains are present. + 5. Memory ID uniqueness — no duplicate corpus IDs. + 6. expected_excluded integrity — all excluded IDs also reference real memories. + +These are pure data tests; they require no MCP server or binary. +""" + +from __future__ import annotations + +import json +from collections import Counter +from pathlib import Path + +import pytest + +_HERE = Path(__file__).parent +_FIXTURES = _HERE.parent / "fixtures" +_V2_CORPUS = _FIXTURES / "memories_corpus_v2.json" + +# Minimum occurrences required for each query_type in the corpus +_MIN_QUERY_TYPE_COUNT = { + "synonym": 5, + "partial": 5, + "importance_trap": 4, +} + +# Acceptable distractor ratio range [min_fraction, max_fraction] +_DISTRACTOR_RATIO_MIN = 0.20 +_DISTRACTOR_RATIO_MAX = 0.50 + +# Minimum number of distinct non-distractor domains +_MIN_DOMAINS = 5 + +# Minimum total memories and queries +_MIN_MEMORIES = 150 +_MIN_QUERIES = 40 + + +@pytest.fixture(scope="module") +def corpus_data() -> dict: + if not _V2_CORPUS.exists(): + pytest.skip(f"v2 corpus not found at {_V2_CORPUS}") + return json.loads(_V2_CORPUS.read_text()) + + +@pytest.fixture(scope="module") +def memory_ids(corpus_data: dict) -> set[str]: + return {m["id"] for m in corpus_data["memories"]} + + +@pytest.fixture(scope="module") +def memories(corpus_data: dict) -> list[dict]: + return corpus_data["memories"] + + +@pytest.fixture(scope="module") +def eval_queries(corpus_data: dict) -> list[dict]: + return corpus_data["eval_queries"] + + +# --------------------------------------------------------------------------- +# Scale tests +# --------------------------------------------------------------------------- + + +def test_memory_count(memories: list[dict]) -> None: + """Corpus must have at least MIN_MEMORIES memories.""" + assert len(memories) >= _MIN_MEMORIES, ( + f"Expected at least {_MIN_MEMORIES} memories, got {len(memories)}" + ) + + +def test_query_count(eval_queries: list[dict]) -> None: + """Corpus must have at least MIN_QUERIES eval queries.""" + assert len(eval_queries) >= _MIN_QUERIES, ( + f"Expected at least {_MIN_QUERIES} eval queries, got {len(eval_queries)}" + ) + + +# --------------------------------------------------------------------------- +# Memory ID integrity +# --------------------------------------------------------------------------- + + +def test_memory_ids_unique(memories: list[dict]) -> None: + """All memory IDs must be unique.""" + ids = [m["id"] for m in memories] + duplicates = [mid for mid, count in Counter(ids).items() if count > 1] + assert not duplicates, f"Duplicate memory IDs found: {duplicates}" + + +def test_memory_ids_have_id_field(memories: list[dict]) -> None: + """Every memory must have an 'id' field.""" + missing = [i for i, m in enumerate(memories) if "id" not in m] + assert not missing, f"Memories at indices {missing} are missing 'id' field" + + +def test_memory_required_fields(memories: list[dict]) -> None: + """Every memory must have the required fields.""" + required = {"id", "content", "importance", "decay_factor", "memory_type"} + for m in memories: + missing = required - m.keys() + assert not missing, f"Memory {m.get('id', '?')} missing fields: {missing}" + + +# --------------------------------------------------------------------------- +# Query schema tests +# --------------------------------------------------------------------------- + + +def test_query_ids_unique(eval_queries: list[dict]) -> None: + """All query IDs must be unique.""" + ids = [q["id"] for q in eval_queries] + duplicates = [qid for qid, count in Counter(ids).items() if count > 1] + assert not duplicates, f"Duplicate query IDs: {duplicates}" + + +def test_queries_have_expected_top_k(eval_queries: list[dict]) -> None: + """Every eval query must have expected_top_k (v2 schema marker).""" + missing = [q.get("id", i) for i, q in enumerate(eval_queries) if "expected_top_k" not in q] + assert not missing, f"Queries missing expected_top_k: {missing}" + + +def test_queries_have_query_type(eval_queries: list[dict]) -> None: + """Every eval query must have a query_type field.""" + missing = [q.get("id", i) for i, q in enumerate(eval_queries) if "query_type" not in q] + assert not missing, f"Queries missing query_type: {missing}" + + +# --------------------------------------------------------------------------- +# Cross-reference integrity +# --------------------------------------------------------------------------- + + +def test_expected_top_k_references_real_ids( + eval_queries: list[dict], memory_ids: set[str] +) -> None: + """Every ID in expected_top_k must correspond to a real memory.""" + errors: list[str] = [] + for q in eval_queries: + for mid in q.get("expected_top_k", []): + if mid not in memory_ids: + errors.append(f"Query {q['id']}: expected_top_k references unknown memory {mid!r}") + assert not errors, "\n".join(errors) + + +def test_expected_excluded_references_real_ids( + eval_queries: list[dict], memory_ids: set[str] +) -> None: + """Every ID in expected_excluded must correspond to a real memory.""" + errors: list[str] = [] + for q in eval_queries: + for mid in q.get("expected_excluded", []): + if mid not in memory_ids: + errors.append( + f"Query {q['id']}: expected_excluded references unknown memory {mid!r}" + ) + assert not errors, "\n".join(errors) + + +def test_no_overlap_between_top_k_and_excluded(eval_queries: list[dict]) -> None: + """A memory cannot appear in both expected_top_k and expected_excluded.""" + errors: list[str] = [] + for q in eval_queries: + top_k = set(q.get("expected_top_k", [])) + excluded = set(q.get("expected_excluded", [])) + overlap = top_k & excluded + if overlap: + errors.append(f"Query {q['id']}: IDs in both top_k and excluded: {overlap}") + assert not errors, "\n".join(errors) + + +# --------------------------------------------------------------------------- +# Query type coverage +# --------------------------------------------------------------------------- + + +def test_query_type_minimum_counts(eval_queries: list[dict]) -> None: + """Each required query_type must appear at least MIN_QUERY_TYPE_COUNT times.""" + type_counts = Counter(q.get("query_type", "unknown") for q in eval_queries) + errors: list[str] = [] + for qtype, min_count in _MIN_QUERY_TYPE_COUNT.items(): + actual = type_counts.get(qtype, 0) + if actual < min_count: + errors.append( + f"query_type={qtype!r}: expected >= {min_count} queries, got {actual}" + ) + assert not errors, "\n".join(errors) + + +def test_query_type_distribution(eval_queries: list[dict]) -> None: + """Report query type distribution (informational — never fails).""" + type_counts = Counter(q.get("query_type", "unknown") for q in eval_queries) + # Just verify types are a non-empty set — actual values tested above + assert len(type_counts) >= 1 + + +# --------------------------------------------------------------------------- +# Distractor ratio +# --------------------------------------------------------------------------- + + +def test_distractor_ratio(memories: list[dict]) -> None: + """Distractor memories should comprise between MIN and MAX of the corpus.""" + distractors = [m for m in memories if m.get("domain") == "distractor"] + ratio = len(distractors) / len(memories) + assert _DISTRACTOR_RATIO_MIN <= ratio <= _DISTRACTOR_RATIO_MAX, ( + f"Distractor ratio {ratio:.2%} outside expected range " + f"[{_DISTRACTOR_RATIO_MIN:.0%}, {_DISTRACTOR_RATIO_MAX:.0%}]. " + f"Got {len(distractors)} distractors out of {len(memories)} memories." + ) + + +# --------------------------------------------------------------------------- +# Domain coverage +# --------------------------------------------------------------------------- + + +def test_domain_coverage(memories: list[dict]) -> None: + """At least MIN_DOMAINS distinct non-distractor domains must be present.""" + non_distractor_domains = { + m.get("domain", "unknown") + for m in memories + if m.get("domain") != "distractor" + } + assert len(non_distractor_domains) >= _MIN_DOMAINS, ( + f"Expected at least {_MIN_DOMAINS} distinct domains, " + f"found {len(non_distractor_domains)}: {sorted(non_distractor_domains)}" + ) + + +def test_domain_field_present(memories: list[dict]) -> None: + """Every memory should have a 'domain' field (informational integrity check).""" + missing = [m["id"] for m in memories if "domain" not in m] + assert not missing, f"Memories missing 'domain' field: {missing}" + + +# --------------------------------------------------------------------------- +# expected_top_k non-empty +# --------------------------------------------------------------------------- + + +def test_expected_top_k_non_empty(eval_queries: list[dict]) -> None: + """Every query must have at least one expected_top_k entry.""" + empty = [q["id"] for q in eval_queries if not q.get("expected_top_k")] + assert not empty, f"Queries with empty expected_top_k: {empty}" diff --git a/tests/khive-contract/tune/REPORT-v2.md b/tests/khive-contract/tune/REPORT-v2.md new file mode 100644 index 00000000..756f8ad0 --- /dev/null +++ b/tests/khive-contract/tune/REPORT-v2.md @@ -0,0 +1,76 @@ +# Param-Tuning Grid Search Report + +- **Date**: 2026-05-25 +- **Corpus version**: v2 +- **Grid size**: 232 configs +- **Eval queries**: 48 +- **Total runtime**: 6.4s +- **Mode**: FTS-only (no_embed=True) + +## Winning Config (highest combined_score) + +| Metric | Value | +|--------|-------| +| combined_score | 0.1302 | +| mrr_expected | 0.1667 | +| precision_at_k | 0.1562 | +| exclusion_penalty | 0.0000 | +| recall_at_10 | 0.1562 | +| mrr (v1) | 0.1667 | +| mean latency | 0.5ms | +| config_index | 0 | + +Parameters: `rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=rrf(k=20) decay=exponential hl=14.0 ms=0.0` + +## Default vs Tuned Comparison + +| Metric | Default config | Tuned config | Delta | +|--------|---------------|-------------|-------| +| combined_score | 0.1302 | 0.1302 | +0.0000 | +| mrr_expected | 0.1667 | 0.1667 | +0.0000 | +| precision_at_k | 0.1562 | 0.1562 | +0.0000 | +| exclusion_penalty | 0.0000 | 0.0000 | +0.0000 | +| recall_at_10 | 0.1562 | 0.1562 | +0.0000 | +| mean latency | 0.6ms | 0.5ms | -0.0ms | + +Default config: relevance=0.70 importance=0.20 temporal=0.10 candidate_multiplier=20 fuse=rrf(k=60) decay=exponential half_life=30.0 + +## Discrimination Analysis (v2 corpus) + +| Metric | Distinct values | Min | Max | Range | +|--------|-----------------|-----|-----|-------| +| combined_score | 2 | 0.0500 | 0.1302 | 0.0802 | +| mrr_expected | 2 | 0.0625 | 0.1667 | 0.1042 | +| precision_at_k | 2 | 0.0625 | 0.1562 | 0.0937 | + +A non-flat landscape requires combined_score range > 0.05 across configs. + +## Top 10 by combined_score + +| idx | combined | mrr_exp | prec@k | excl_pen | latency | config | +|-----|---------|---------|--------|----------|---------|--------| +| 0 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.5ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=rrf(k=20) decay=exponential hl=14.0 ms=0.0 | +| 1 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=rrf(k=60) decay=exponential hl=30.0 ms=0.0 | +| 2 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=rrf(k=100) decay=exponential hl=60.0 ms=0.0 | +| 3 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(1.0/0.0) decay=hyperbolic hl=14.0 ms=0.0 | +| 4 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.75/0.25) decay=hyperbolic hl=30.0 ms=0.0 | +| 5 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.5ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.5/0.5) decay=hyperbolic hl=60.0 ms=0.0 | +| 6 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.25/0.75) decay=none hl=14.0 ms=0.0 | +| 7 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.0/1.0) decay=none hl=30.0 ms=0.0 | +| 8 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=rrf(k=20) decay=none hl=60.0 ms=0.0 | +| 9 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=rrf(k=100) decay=exponential hl=14.0 ms=0.0 | + +## Top 10 by MRR + +| idx | combined | mrr_exp | prec@k | excl_pen | latency | config | +|-----|---------|---------|--------|----------|---------|--------| +| 0 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.5ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=rrf(k=20) decay=exponential hl=14.0 ms=0.0 | +| 1 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=rrf(k=60) decay=exponential hl=30.0 ms=0.0 | +| 2 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=rrf(k=100) decay=exponential hl=60.0 ms=0.0 | +| 3 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(1.0/0.0) decay=hyperbolic hl=14.0 ms=0.0 | +| 4 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.75/0.25) decay=hyperbolic hl=30.0 ms=0.0 | +| 5 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.5ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.5/0.5) decay=hyperbolic hl=60.0 ms=0.0 | +| 6 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.25/0.75) decay=none hl=14.0 ms=0.0 | +| 7 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.0/1.0) decay=none hl=30.0 ms=0.0 | +| 8 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=rrf(k=20) decay=none hl=60.0 ms=0.0 | +| 9 | 0.1302 | 0.1667 | 0.1562 | 0.0000 | 0.6ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=rrf(k=100) decay=exponential hl=14.0 ms=0.0 | diff --git a/tests/khive-contract/tune/grid_search.py b/tests/khive-contract/tune/grid_search.py index b7e64a33..d7ebedf1 100644 --- a/tests/khive-contract/tune/grid_search.py +++ b/tests/khive-contract/tune/grid_search.py @@ -4,6 +4,25 @@ strategies, decay models, and temporal half-life parameters. One MCP session is created and the corpus is loaded once; config is varied per recall() call. +Supports two corpus schemas: + v1 (memories_corpus.json): eval_queries use ``relevant_indices`` (int list) + v2 (memories_corpus_v2.json): eval_queries use ``expected_top_k`` (corpus ID + strings like "mem_001") and optional ``expected_excluded``. + +Discriminating metrics (v2 corpus): + MRR_expected: mean reciprocal rank of the FIRST expected_top_k hit in results. + Sensitive to ranking order — a hit at rank 1 scores 1.0, rank 5 scores 0.2. + precision_at_k: fraction of expected_top_k that appear in the top-k results, + where k = len(expected_top_k). Sensitive to candidate pool and score weights. + exclusion_penalty: fraction of expected_excluded items that appear in top-10, + penalising configs that surface distractors. Subtracted from final score. + +Combined discriminating score = 0.5 * MRR_expected + 0.3 * precision_at_k + - 0.2 * exclusion_penalty + +The v1 recall_at_10 metric is retained for backwards compatibility when a v1 +corpus is loaded. + TODO: Add --with-embed flag for embedding-enabled grid over both all-minilm-l6-v2 and paraphrase-multilingual-minilm-l12-v2 models. Requires no_embed=False and KHIVE_ADDITIONAL_EMBEDDING_MODELS=paraphrase. @@ -26,18 +45,36 @@ DEFAULT_CORPUS = _HERE.parent / "fixtures" / "memories_corpus.json" DEFAULT_OUTPUT = _HERE +# Weight constants for the combined discriminating score (v2 only) +_MRR_WEIGHT = 0.5 +_PREC_WEIGHT = 0.3 +_EXCL_WEIGHT = 0.2 + # --------------------------------------------------------------------------- # Data loading # --------------------------------------------------------------------------- -def load_corpus(path: Path) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: - """Load memories and eval_queries from a corpus JSON file.""" +def _detect_corpus_version(data: dict[str, Any]) -> str: + """Return "v2" if any eval query uses expected_top_k, else "v1".""" + for eq in data.get("eval_queries", []): + if "expected_top_k" in eq: + return "v2" + return "v1" + + +def load_corpus(path: Path) -> tuple[list[dict[str, Any]], list[dict[str, Any]], str]: + """Load memories and eval_queries from a corpus JSON file. + + Returns: + (memories, eval_queries, version) where version is "v1" or "v2". + """ data = json.loads(path.read_text()) memories: list[dict[str, Any]] = data["memories"] eval_queries: list[dict[str, Any]] = data["eval_queries"] - return memories, eval_queries + version = _detect_corpus_version(data) + return memories, eval_queries, version # --------------------------------------------------------------------------- @@ -46,15 +83,18 @@ def load_corpus(path: Path) -> tuple[list[dict[str, Any]], list[dict[str, Any]]] def setup_session( - memories: list[dict[str, Any]], db: str = ":memory:" -) -> tuple[KhiveMcpSession, dict[int, str]]: + memories: list[dict[str, Any]], db: str = ":memory:", *, version: str = "v1" +) -> tuple[KhiveMcpSession, dict[str, str]]: """Open a KhiveMcpSession and load all corpus memories via remember(). The returned session is already entered (via __enter__). The caller MUST call session.close() when done, or use a try/finally block. + For v1 corpus the key is the integer index (as a string). + For v2 corpus the key is the corpus ID string (e.g. "mem_001"). + Returns: - (session, note_id_map) where note_id_map[corpus_index] = note_id string. + (session, note_id_map) where note_id_map[corpus_key] = khive note_id. """ session = KhiveMcpSession( packs=("kg", "memory"), @@ -64,7 +104,7 @@ def setup_session( ) session.__enter__() - note_id_map: dict[int, str] = {} + note_id_map: dict[str, str] = {} total = len(memories) print(f"Loading {total} memories into session...") t_load_start = time.perf_counter() @@ -83,7 +123,12 @@ def setup_session( note_id = result.get("note_id") or result.get("id") if result else None if not note_id: raise RuntimeError(f"remember() returned no note_id for memory {i}: {result!r}") - note_id_map[i] = str(note_id) + + if version == "v2": + corpus_key = mem.get("id", str(i)) + else: + corpus_key = str(i) + note_id_map[corpus_key] = str(note_id) if (i + 1) % 25 == 0: elapsed = time.perf_counter() - t_load_start @@ -99,31 +144,76 @@ def setup_session( # --------------------------------------------------------------------------- +def _resolve_relevant_ids_v1( + eq: dict[str, Any], note_id_map: dict[str, str] +) -> set[str]: + """Resolve v1 relevant_indices to khive note IDs.""" + return {note_id_map[str(i)] for i in eq.get("relevant_indices", []) if str(i) in note_id_map} + + +def _resolve_relevant_ids_v2( + eq: dict[str, Any], note_id_map: dict[str, str] +) -> tuple[list[str], set[str]]: + """Resolve v2 expected_top_k and expected_excluded to khive note IDs. + + Returns: + (expected_top_k_ids, excluded_ids) where each element is a khive note ID. + """ + top_k_ids = [note_id_map[cid] for cid in eq.get("expected_top_k", []) if cid in note_id_map] + excluded_ids = {note_id_map[cid] for cid in eq.get("expected_excluded", []) if cid in note_id_map} + return top_k_ids, excluded_ids + + def evaluate_config( session: KhiveMcpSession, config_dict: dict[str, Any], eval_queries: list[dict[str, Any]], - note_id_map: dict[int, str], + note_id_map: dict[str, str], + *, + version: str = "v1", ) -> dict[str, float]: """Evaluate one RecallConfig against all eval queries. - Returns: - {"recall_at_10": float, "mrr": float, "mean_latency_ms": float} + v1 returns: {"recall_at_10", "mrr", "mean_latency_ms"} + v2 returns: {"recall_at_10", "mrr", "mrr_expected", "precision_at_k", + "exclusion_penalty", "combined_score", "mean_latency_ms"} + + mrr_expected (v2): mean reciprocal rank of the FIRST expected_top_k hit. + Unlike the v1 MRR which considers ANY relevant memory, this measures + whether the *specifically expected* items appear early in results. + precision_at_k (v2): fraction of expected_top_k found in the top-k results + where k = len(expected_top_k). Penalises configs that miss specific targets. + exclusion_penalty (v2): fraction of expected_excluded that appear in top-10. + Non-zero means distractors are being surfaced above relevant items. + combined_score (v2): 0.5*mrr_expected + 0.3*precision_at_k - 0.2*exclusion_penalty + This is the primary discriminating metric for v2 grid ranking. """ recalls: list[float] = [] mrrs: list[float] = [] latencies: list[float] = [] + # v2-only accumulators + mrrs_expected: list[float] = [] + precisions_at_k: list[float] = [] + exclusion_penalties: list[float] = [] + for eq in eval_queries: query: str = eq["query"] - relevant_indices: list[int] = eq["relevant_indices"] - relevant_note_ids = {note_id_map[i] for i in relevant_indices if i in note_id_map} + + if version == "v2": + expected_top_k_ids, excluded_ids = _resolve_relevant_ids_v2(eq, note_id_map) + relevant_note_ids = set(expected_top_k_ids) + limit = max(10, len(expected_top_k_ids)) + else: + relevant_note_ids = _resolve_relevant_ids_v1(eq, note_id_map) + excluded_ids = set() + limit = 10 t0 = time.perf_counter() try: hits = session.verb( "recall", - {"query": query, "limit": 10, "config": config_dict}, + {"query": query, "limit": limit, "config": config_dict}, ) except Exception: hits = [] @@ -137,29 +227,70 @@ def evaluate_config( if nid: retrieved_ids.append(str(nid)) - # recall@10 retrieved_set = set(retrieved_ids) + top10_set = set(retrieved_ids[:10]) + + # --- recall@10 (both versions) --- if relevant_note_ids: - r_at_10 = len(relevant_note_ids & retrieved_set) / len(relevant_note_ids) + r_at_10 = len(relevant_note_ids & top10_set) / len(relevant_note_ids) else: r_at_10 = 0.0 recalls.append(r_at_10) - # MRR — reciprocal rank of first relevant hit - mrr = 0.0 + # --- MRR v1: reciprocal rank of first ANY relevant hit --- + mrr_v1 = 0.0 for rank, nid in enumerate(retrieved_ids, 1): if nid in relevant_note_ids: - mrr = 1.0 / rank + mrr_v1 = 1.0 / rank break - mrrs.append(mrr) + mrrs.append(mrr_v1) + + if version == "v2": + # --- MRR_expected: reciprocal rank of first expected_top_k hit --- + mrr_exp = 0.0 + for rank, nid in enumerate(retrieved_ids, 1): + if nid in set(expected_top_k_ids): + mrr_exp = 1.0 / rank + break + mrrs_expected.append(mrr_exp) + + # --- precision@k: fraction of expected_top_k in top-k results --- + k = len(expected_top_k_ids) + if k > 0: + top_k_retrieved = set(retrieved_ids[:k]) + prec_at_k = len(set(expected_top_k_ids) & top_k_retrieved) / k + else: + prec_at_k = 0.0 + precisions_at_k.append(prec_at_k) + + # --- exclusion_penalty: distractors surfaced in top-10 --- + if excluded_ids: + penalty = len(excluded_ids & top10_set) / len(excluded_ids) + else: + penalty = 0.0 + exclusion_penalties.append(penalty) n = len(eval_queries) - return { + base: dict[str, float] = { "recall_at_10": sum(recalls) / n if n else 0.0, "mrr": sum(mrrs) / n if n else 0.0, "mean_latency_ms": sum(latencies) / n if n else 0.0, } + if version == "v2": + mrr_exp_mean = sum(mrrs_expected) / n if n else 0.0 + prec_mean = sum(precisions_at_k) / n if n else 0.0 + excl_mean = sum(exclusion_penalties) / n if n else 0.0 + combined = _MRR_WEIGHT * mrr_exp_mean + _PREC_WEIGHT * prec_mean - _EXCL_WEIGHT * excl_mean + base.update({ + "mrr_expected": mrr_exp_mean, + "precision_at_k": prec_mean, + "exclusion_penalty": excl_mean, + "combined_score": combined, + }) + + return base + # --------------------------------------------------------------------------- # Grid generation @@ -169,8 +300,12 @@ def evaluate_config( def generate_grid(quick: bool = False) -> list[dict[str, Any]]: """Generate the FTS-only RecallConfig parameter grid. - Full grid: 4 × 4 × 8 × 3 × 3 = 1152 configs - Quick grid: every 10th config (deterministic sort) ≈ 116 configs + Full grid (v2): 4 × 4 × 8 × 3 × 3 × 2 = 2304 configs + Quick grid: every 20th config (deterministic sort) ≈ 116 configs + + v2 adds min_salience variation which DOES discriminate on the v2 corpus: + - min_salience=0.0: retrieves all items including low-importance traps + - min_salience=0.4: filters out low-importance items (0.28-0.35) from results Weight triples are normalized so relevance+importance+temporal = 1.0. Weighted fusion uses [text_weight, vector_weight] where alpha=vector_weight. @@ -209,29 +344,46 @@ def generate_grid(quick: bool = False) -> list[dict[str, Any]]: decay_models = ["exponential", "hyperbolic", "none"] half_lives = [14.0, 30.0, 60.0] + # min_salience variation: the key discriminating dimension for v2 corpus. + # 0.0 includes all items (even low-importance traps). + # 0.40 filters out importance_trap memories (importance 0.26-0.35) from results, + # which FAILS importance_trap queries (those expected items have salience < 0.40). + # This creates measurable discrimination: configs with min_salience=0.0 find trap + # items; configs with min_salience=0.40 do not. + min_salience_values = [0.0, 0.40] + configs: list[dict[str, Any]] = [] for rw, iw, tw in weight_triples: for cm, cl in candidate_pools: for fuse in fusion_configs: for decay in decay_models: for hl in half_lives: - cfg: dict[str, Any] = { - "relevance_weight": rw, - "importance_weight": iw, - "temporal_weight": tw, - "candidate_multiplier": cm, - "fuse_strategy": fuse, - "decay_model": decay, - "temporal_half_life_days": hl, - "min_score": 0.0, - "min_salience": 0.0, - } - if cl is not None: - cfg["candidate_limit"] = cl - configs.append(cfg) + for ms in min_salience_values: + cfg: dict[str, Any] = { + "relevance_weight": rw, + "importance_weight": iw, + "temporal_weight": tw, + "candidate_multiplier": cm, + "fuse_strategy": fuse, + "decay_model": decay, + "temporal_half_life_days": hl, + "min_score": 0.0, + "min_salience": ms, + } + if cl is not None: + cfg["candidate_limit"] = cl + configs.append(cfg) if quick: - configs = configs[::10] + # Sample the grid to get ~116 configs while preserving both min_salience values. + # Because min_salience alternates as the innermost dimension, we take every 10th + # config from the even positions (ms=0.0 group) and every 10th from the odd + # positions (ms=0.4 group), then interleave them. This ensures both values appear. + ms0_configs = configs[::2] # every even index = min_salience=0.0 + ms04_configs = configs[1::2] # every odd index = min_salience=0.40 + sampled_ms0 = ms0_configs[::10] # ~58 configs + sampled_ms04 = ms04_configs[::10] # ~58 configs + configs = sampled_ms0 + sampled_ms04 # ~116 total return configs @@ -245,7 +397,9 @@ def run_grid( session: KhiveMcpSession, grid: list[dict[str, Any]], eval_queries: list[dict[str, Any]], - note_id_map: dict[int, str], + note_id_map: dict[str, str], + *, + version: str = "v1", ) -> list[dict[str, Any]]: """Run evaluate_config for every config in the grid. @@ -253,7 +407,7 @@ def run_grid( Prints progress every 100 configs. Returns: - List of result dicts: {"config_index", "config", "recall_at_10", "mrr", "mean_latency_ms"} + List of result dicts with metrics appropriate to the corpus version. """ results: list[dict[str, Any]] = [] total = len(grid) @@ -261,7 +415,7 @@ def run_grid( for i, config in enumerate(grid): if i % 100 == 0: print(f" [{i}/{total}] config {i}...") - metrics = evaluate_config(session, config, eval_queries, note_id_map) + metrics = evaluate_config(session, config, eval_queries, note_id_map, version=version) results.append( { "config_index": i, @@ -297,28 +451,39 @@ def write_results( output_dir: Path, *, t_total_seconds: float, + n_eval_queries: int, default_config_metrics: dict[str, float] | None = None, + version: str = "v1", + report_filename: str = "REPORT.md", ) -> None: - """Write results.json, tuned-config.toml, and REPORT.md to output_dir.""" + """Write results.json, tuned-config.toml, and REPORT.md (or REPORT-v2.md) to output_dir.""" output_dir.mkdir(parents=True, exist_ok=True) t_total = t_total_seconds today = date.today().isoformat() # --- results.json --- - (output_dir / "results.json").write_text(json.dumps(results, indent=2)) - print(f"Wrote {output_dir / 'results.json'} ({len(results)} configs)") - - # --- rank by recall@10 then MRR --- - sorted_by_recall = sorted( - results, key=lambda r: (r["recall_at_10"], r["mrr"]), reverse=True + results_filename = "results.json" if version == "v1" else "results_v2.json" + (output_dir / results_filename).write_text(json.dumps(results, indent=2)) + print(f"Wrote {output_dir / results_filename} ({len(results)} configs)") + + # --- choose primary sort key --- + if version == "v2": + primary_sort_key = "combined_score" + else: + primary_sort_key = "recall_at_10" + + sorted_primary = sorted( + results, key=lambda r: (r[primary_sort_key], r.get("mrr", 0.0)), reverse=True ) sorted_by_mrr = sorted( - results, key=lambda r: (r["mrr"], r["recall_at_10"]), reverse=True + results, key=lambda r: (r.get("mrr_expected", r.get("mrr", 0.0)), r[primary_sort_key]), + reverse=True, ) - winner = sorted_by_recall[0] + winner = sorted_primary[0] cfg = winner["config"] # --- tuned-config.toml --- + toml_filename = "tuned-config.toml" if version == "v1" else "tuned-config-v2.toml" fuse_toml = _fuse_to_toml(cfg["fuse_strategy"]) decay_model_str = cfg["decay_model"] if isinstance(cfg["decay_model"], str) else json.dumps(cfg["decay_model"]) cl_line = ( @@ -326,11 +491,18 @@ def write_results( if cfg.get("candidate_limit") is not None else "# candidate_limit = null (use multiplier only)" ) + score_comment = ( + f"# combined_score = {winner['combined_score']:.4f} " + f"(mrr_expected={winner['mrr_expected']:.4f} precision_at_k={winner['precision_at_k']:.4f} " + f"exclusion_penalty={winner['exclusion_penalty']:.4f})" + if version == "v2" + else f"# recall_at_10 = {winner['recall_at_10']:.4f}" + ) toml_content = f"""\ # Winning config from khive recall param-tuning grid search # run_date = "{today}" -# recall_at_10 = {winner['recall_at_10']:.4f} -# mrr = {winner['mrr']:.4f} +{score_comment} +# mrr = {winner.get('mrr', 0.0):.4f} # mean_latency_ms = {winner['mean_latency_ms']:.2f} [recall] @@ -345,11 +517,11 @@ def write_results( min_score = {cfg['min_score']} min_salience = {cfg['min_salience']} """ - (output_dir / "tuned-config.toml").write_text(toml_content) - print(f"Wrote {output_dir / 'tuned-config.toml'}") + (output_dir / toml_filename).write_text(toml_content) + print(f"Wrote {output_dir / toml_filename}") # --- REPORT.md --- - top10_recall = sorted_by_recall[:10] + top10_primary = sorted_primary[:10] top10_mrr = sorted_by_mrr[:10] def _cfg_summary(r: dict[str, Any]) -> str: @@ -363,24 +535,58 @@ def _cfg_summary(r: dict[str, Any]) -> str: else: fuse_str = str(fuse) decay_str = c["decay_model"] if isinstance(c["decay_model"], str) else json.dumps(c["decay_model"]) + ms = c.get("min_salience", 0.0) return ( f"rel={c['relevance_weight']} imp={c['importance_weight']} " f"tmp={c['temporal_weight']} cand={c['candidate_multiplier']} " - f"fuse={fuse_str} decay={decay_str} hl={c['temporal_half_life_days']}" + f"fuse={fuse_str} decay={decay_str} hl={c['temporal_half_life_days']} " + f"ms={ms}" ) - def _row(r: dict[str, Any]) -> str: - return ( - f"| {r['config_index']:4d} | {r['recall_at_10']:.4f} | {r['mrr']:.4f} " - f"| {r['mean_latency_ms']:.1f}ms | {_cfg_summary(r)} |" - ) + if version == "v2": + def _row(r: dict[str, Any]) -> str: + return ( + f"| {r['config_index']:4d} " + f"| {r['combined_score']:.4f} " + f"| {r['mrr_expected']:.4f} " + f"| {r['precision_at_k']:.4f} " + f"| {r['exclusion_penalty']:.4f} " + f"| {r['mean_latency_ms']:.1f}ms " + f"| {_cfg_summary(r)} |" + ) + header = "| idx | combined | mrr_exp | prec@k | excl_pen | latency | config |" + divider = "|-----|---------|---------|--------|----------|---------|--------|" + else: + def _row(r: dict[str, Any]) -> str: + return ( + f"| {r['config_index']:4d} | {r['recall_at_10']:.4f} | {r['mrr']:.4f} " + f"| {r['mean_latency_ms']:.1f}ms | {_cfg_summary(r)} |" + ) + header = "| idx | recall@10 | mrr | latency | config |" + divider = "|-----|-----------|-----|---------|--------|" - top10_recall_rows = "\n".join(_row(r) for r in top10_recall) + top10_primary_rows = "\n".join(_row(r) for r in top10_primary) top10_mrr_rows = "\n".join(_row(r) for r in top10_mrr) default_section = "" if default_config_metrics: - default_section = f""" + if version == "v2": + default_section = f""" +## Default vs Tuned Comparison + +| Metric | Default config | Tuned config | Delta | +|--------|---------------|-------------|-------| +| combined_score | {default_config_metrics['combined_score']:.4f} | {winner['combined_score']:.4f} | {winner['combined_score'] - default_config_metrics['combined_score']:+.4f} | +| mrr_expected | {default_config_metrics['mrr_expected']:.4f} | {winner['mrr_expected']:.4f} | {winner['mrr_expected'] - default_config_metrics['mrr_expected']:+.4f} | +| precision_at_k | {default_config_metrics['precision_at_k']:.4f} | {winner['precision_at_k']:.4f} | {winner['precision_at_k'] - default_config_metrics['precision_at_k']:+.4f} | +| exclusion_penalty | {default_config_metrics['exclusion_penalty']:.4f} | {winner['exclusion_penalty']:.4f} | {winner['exclusion_penalty'] - default_config_metrics['exclusion_penalty']:+.4f} | +| recall_at_10 | {default_config_metrics['recall_at_10']:.4f} | {winner['recall_at_10']:.4f} | {winner['recall_at_10'] - default_config_metrics['recall_at_10']:+.4f} | +| mean latency | {default_config_metrics['mean_latency_ms']:.1f}ms | {winner['mean_latency_ms']:.1f}ms | {winner['mean_latency_ms'] - default_config_metrics['mean_latency_ms']:+.1f}ms | + +Default config: relevance=0.70 importance=0.20 temporal=0.10 candidate_multiplier=20 fuse=rrf(k=60) decay=exponential half_life=30.0 +""" + else: + default_section = f""" ## Default vs Tuned Comparison | Metric | Default config | Tuned config | Delta | @@ -392,40 +598,71 @@ def _row(r: dict[str, Any]) -> str: Default config: relevance=0.70 importance=0.20 temporal=0.10 candidate_multiplier=20 fuse=rrf(k=60) decay=exponential half_life=30.0 """ + if version == "v2": + winning_metrics_table = f"""\ +| combined_score | {winner['combined_score']:.4f} | +| mrr_expected | {winner['mrr_expected']:.4f} | +| precision_at_k | {winner['precision_at_k']:.4f} | +| exclusion_penalty | {winner['exclusion_penalty']:.4f} | +| recall_at_10 | {winner['recall_at_10']:.4f} | +| mrr (v1) | {winner['mrr']:.4f} | +| mean latency | {winner['mean_latency_ms']:.1f}ms | +| config_index | {winner['config_index']} |""" + # compute score ranges to document discrimination + combined_scores = sorted(set(round(r["combined_score"], 4) for r in results)) + mrr_exp_scores = sorted(set(round(r["mrr_expected"], 4) for r in results)) + prec_scores = sorted(set(round(r["precision_at_k"], 4) for r in results)) + discrimination_section = f""" +## Discrimination Analysis (v2 corpus) + +| Metric | Distinct values | Min | Max | Range | +|--------|-----------------|-----|-----|-------| +| combined_score | {len(combined_scores)} | {min(combined_scores):.4f} | {max(combined_scores):.4f} | {max(combined_scores) - min(combined_scores):.4f} | +| mrr_expected | {len(mrr_exp_scores)} | {min(mrr_exp_scores):.4f} | {max(mrr_exp_scores):.4f} | {max(mrr_exp_scores) - min(mrr_exp_scores):.4f} | +| precision_at_k | {len(prec_scores)} | {min(prec_scores):.4f} | {max(prec_scores):.4f} | {max(prec_scores) - min(prec_scores):.4f} | + +A non-flat landscape requires combined_score range > 0.05 across configs. +""" + else: + winning_metrics_table = f"""\ +| recall@10 | {winner['recall_at_10']:.4f} | +| MRR | {winner['mrr']:.4f} | +| mean latency | {winner['mean_latency_ms']:.1f}ms | +| config_index | {winner['config_index']} |""" + discrimination_section = "" + report = f"""\ # Param-Tuning Grid Search Report - **Date**: {today} +- **Corpus version**: {version} - **Grid size**: {len(results)} configs -- **Eval queries**: 20 +- **Eval queries**: {n_eval_queries} - **Total runtime**: {t_total:.1f}s - **Mode**: FTS-only (no_embed=True) -## Winning Config (highest recall@10) +## Winning Config (highest {primary_sort_key}) | Metric | Value | |--------|-------| -| recall@10 | {winner['recall_at_10']:.4f} | -| MRR | {winner['mrr']:.4f} | -| mean latency | {winner['mean_latency_ms']:.1f}ms | -| config_index | {winner['config_index']} | +{winning_metrics_table} Parameters: `{_cfg_summary(winner)}` -{default_section} -## Top 10 by recall@10 +{default_section}{discrimination_section} +## Top 10 by {primary_sort_key} -| idx | recall@10 | mrr | latency | config | -|-----|-----------|-----|---------|--------| -{top10_recall_rows} +{header} +{divider} +{top10_primary_rows} ## Top 10 by MRR -| idx | recall@10 | mrr | latency | config | -|-----|-----------|-----|---------|--------| +{header} +{divider} {top10_mrr_rows} """ - (output_dir / "REPORT.md").write_text(report) - print(f"Wrote {output_dir / 'REPORT.md'}") + (output_dir / report_filename).write_text(report) + print(f"Wrote {output_dir / report_filename}") # --------------------------------------------------------------------------- @@ -464,7 +701,7 @@ def main() -> None: "--corpus", type=Path, default=DEFAULT_CORPUS, - help="Path to memories_corpus.json fixture.", + help="Path to memories corpus JSON fixture (v1 or v2 schema auto-detected).", ) args = parser.parse_args() @@ -475,41 +712,67 @@ def main() -> None: raise FileNotFoundError(f"Corpus not found: {corpus_path}") print(f"Loading corpus from {corpus_path}") - memories, eval_queries = load_corpus(corpus_path) - print(f"Corpus: {len(memories)} memories, {len(eval_queries)} eval queries") + memories, eval_queries, version = load_corpus(corpus_path) + print(f"Corpus: {len(memories)} memories, {len(eval_queries)} eval queries (schema={version})") grid = generate_grid(quick=args.quick) print(f"Grid: {len(grid)} configs (quick={args.quick})") t_start = time.perf_counter() - session, note_id_map = setup_session(memories) + session, note_id_map = setup_session(memories, version=version) try: # Evaluate default config for the comparison table - default_metrics = evaluate_config(session, _DEFAULT_CONFIG, eval_queries, note_id_map) - print( - f"Default config: recall@10={default_metrics['recall_at_10']:.4f} " - f"mrr={default_metrics['mrr']:.4f}" + default_metrics = evaluate_config( + session, _DEFAULT_CONFIG, eval_queries, note_id_map, version=version ) + if version == "v2": + print( + f"Default config: combined={default_metrics['combined_score']:.4f} " + f"mrr_exp={default_metrics['mrr_expected']:.4f} " + f"prec@k={default_metrics['precision_at_k']:.4f}" + ) + else: + print( + f"Default config: recall@10={default_metrics['recall_at_10']:.4f} " + f"mrr={default_metrics['mrr']:.4f}" + ) - results = run_grid(session, grid, eval_queries, note_id_map) + results = run_grid(session, grid, eval_queries, note_id_map, version=version) finally: session.close() t_elapsed = time.perf_counter() - t_start print(f"Grid search complete in {t_elapsed:.1f}s") + report_filename = "REPORT-v2.md" if version == "v2" else "REPORT.md" write_results( results, output_dir, t_total_seconds=t_elapsed, + n_eval_queries=len(eval_queries), default_config_metrics=default_metrics, + version=version, + report_filename=report_filename, ) - best = max(results, key=lambda r: (r["recall_at_10"], r["mrr"])) - print( - f"\nBest config: recall@10={best['recall_at_10']:.4f} mrr={best['mrr']:.4f} " - f"(index {best['config_index']})" - ) + if version == "v2": + best = max(results, key=lambda r: (r["combined_score"], r.get("mrr_expected", 0.0))) + combined_scores = [r["combined_score"] for r in results] + print( + f"\nBest config: combined={best['combined_score']:.4f} " + f"mrr_exp={best['mrr_expected']:.4f} prec@k={best['precision_at_k']:.4f} " + f"(index {best['config_index']})" + ) + print( + f"Score range: [{min(combined_scores):.4f}, {max(combined_scores):.4f}] " + f"(range={max(combined_scores) - min(combined_scores):.4f})" + ) + else: + best = max(results, key=lambda r: (r["recall_at_10"], r["mrr"])) + print( + f"\nBest config: recall@10={best['recall_at_10']:.4f} mrr={best['mrr']:.4f} " + f"(index {best['config_index']})" + ) print(f"Results written to {output_dir}")