diff --git a/crates/khive-pack-memory/src/config.rs b/crates/khive-pack-memory/src/config.rs index 103faa5..52d9874 100644 --- a/crates/khive-pack-memory/src/config.rs +++ b/crates/khive-pack-memory/src/config.rs @@ -51,6 +51,11 @@ pub struct RecallConfig { pub fallback_during_migration: bool, } +// Tuning artifact: tests/khive-contract/tune/ swept 116 configs but the synthetic corpus +// produced an identical recall@10 = 0.9333 for every config — i.e. a flat landscape that +// cannot empirically distinguish these parameters. Defaults below stay at the prior values +// until a harder corpus (embed-enabled, synonym queries, partial matches) provides signal. +// See tests/khive-contract/tune/REPORT.md for the analysis. impl Default for RecallConfig { fn default() -> Self { Self { diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 3f83b81..fed761e 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -813,6 +813,7 @@ impl MemoryPack { #[cfg(test)] mod tests { use super::*; + use crate::config::DecayModel; #[test] fn validate_memory_type_rejects_invalid() { @@ -1046,9 +1047,14 @@ mod tests { #[test] fn compute_score_exponential_decay_at_decay_factor_half_life() { - let cfg = RecallConfig::default(); // temporal_half_life = 30 days, default decay_factor=0.01 - // ADR-021 §5: importance_decayed = salience * exp(-decay_factor * age_days) - // At age = ln(2)/0.01 ≈ 69.3 days: importance_decayed ≈ 0.5 + // Use explicit exponential decay config — not relying on default decay_model. + // ADR-021 §5: importance_decayed = salience * exp(-decay_factor * age_days) + // At age = ln(2)/0.01 ≈ 69.3 days: importance_decayed ≈ 0.5 + let cfg = RecallConfig { + decay_model: DecayModel::Exponential, + temporal_half_life_days: 30.0, + ..RecallConfig::default() + }; let age_days = std::f64::consts::LN_2 / 0.01; let (_, bd) = compute_score(&cfg, 0.5, 1.0, 0.01, age_days); assert!( @@ -1063,7 +1069,11 @@ mod tests { #[test] fn compute_score_temporal_halves_at_temporal_half_life() { - let cfg = RecallConfig::default(); // temporal_half_life = 30 days + // Use explicit half_life=30 — not relying on default temporal_half_life_days. + let cfg = RecallConfig { + temporal_half_life_days: 30.0, + ..RecallConfig::default() + }; let (_, bd) = compute_score(&cfg, 0.5, 1.0, 0.01, 30.0); // At age = temporal_half_life = 30 days: temporal = exp(-ln2/30 * 30) = 0.5 assert!( diff --git a/tests/khive-contract/fixtures/memories_corpus.json b/tests/khive-contract/fixtures/memories_corpus.json new file mode 100644 index 0000000..b78dba2 --- /dev/null +++ b/tests/khive-contract/fixtures/memories_corpus.json @@ -0,0 +1,806 @@ +{ + "memories": [ + { + "content": "Python list comprehension is a concise syntax for creating lists from iterables, equivalent to map and filter operations in functional programming", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "python", "functional"] + }, + { + "content": "Python lambda functions enable functional programming patterns including map, filter, and reduce operations on collections", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "python", "functional"] + }, + { + "content": "Python decorators wrap functions to add behavior like caching, logging, or access control without modifying the original function code", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "python"] + }, + { + "content": "Rust ownership system enforces memory safety at compile time through borrow checker rules that prevent use-after-free and data races", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["programming", "rust", "memory-safety"] + }, + { + "content": "Rust borrow checker ensures only one mutable reference or multiple immutable references exist at a time, preventing memory safety violations", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["programming", "rust", "borrow-checker"] + }, + { + "content": "Rust lifetimes are annotations that tell the borrow checker how long references are valid, enabling safe memory management without garbage collection", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "rust", "lifetimes"] + }, + { + "content": "Binary search trees enable O(log n) lookup, insertion, and deletion by maintaining sorted order in left and right subtrees", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "data-structures"] + }, + { + "content": "AVL trees are self-balancing binary search trees that maintain height balance to guarantee O(log n) operations via rotation algorithms", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "data-structures"] + }, + { + "content": "Hash tables provide O(1) average-case lookup by mapping keys to array indices using a hash function and collision resolution strategy", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "data-structures"] + }, + { + "content": "Unit testing verifies individual functions and methods in isolation, forming the foundation of test-driven development TDD methodology", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "testing", "tdd"] + }, + { + "content": "Test-driven development TDD requires writing failing unit tests before implementing production code, then refactoring once all tests pass green", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["programming", "testing", "tdd"] + }, + { + "content": "Integration tests verify that multiple software components work correctly together as a system, complementing unit tests in a complete test suite", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "testing"] + }, + { + "content": "Debugging with stack traces reveals the call hierarchy at the point of exception failure, helping identify the root cause of bugs", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "debugging"] + }, + { + "content": "A debugging tool called a debugger allows setting breakpoints, stepping through code execution line by line, and inspecting variable values to trace bugs", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "debugging"] + }, + { + "content": "JavaScript async await syntax simplifies asynchronous programming by allowing sequential-looking code for promise-based operations", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "javascript", "async"] + }, + { + "content": "JavaScript Promises represent the eventual completion or failure of asynchronous operations and allow chaining with then and catch methods", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "javascript", "async"] + }, + { + "content": "Git version control tracks changes to source code over time, enabling branching, merging, and collaborative development workflows", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "git"] + }, + { + "content": "Docker containers package application code with its runtime dependencies into isolated portable environments for consistent deployment", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "devops"] + }, + { + "content": "RESTful APIs use HTTP methods GET POST PUT DELETE to perform CRUD operations on resources identified by uniform resource identifiers", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "api", "rest"] + }, + { + "content": "SQL JOIN operations combine rows from two or more database tables based on related columns to query relational data", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "database", "sql"] + }, + { + "content": "Dynamic programming solves optimization problems by breaking them into overlapping subproblems and caching intermediate results through memoization", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms"] + }, + { + "content": "Graph traversal algorithms breadth-first search BFS and depth-first search DFS explore all nodes in a connected graph systematically", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "graphs"] + }, + { + "content": "Regular expressions are patterns for matching searching and manipulating strings using special character classes quantifiers and anchors", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["programming", "regex"] + }, + { + "content": "Sorting algorithms like quicksort mergesort and heapsort have different time complexity and space trade-offs for ordering data", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "sorting"] + }, + { + "content": "Object-oriented programming uses classes inheritance polymorphism and encapsulation to organize code around data structures and behavior", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "oop"] + }, + { + "content": "Derivatives measure the instantaneous rate of change of a mathematical function, forming the foundation of differential calculus", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "calculus"] + }, + { + "content": "Integration in calculus computes the area under a curve and is used to find antiderivatives, representing accumulated change", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "calculus"] + }, + { + "content": "The fundamental theorem of calculus links differentiation and integration showing they are inverse mathematical operations", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["math", "calculus"] + }, + { + "content": "Matrix multiplication combines two matrices to produce a new matrix and is fundamental to linear algebra and computer graphics transformations", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "linear-algebra"] + }, + { + "content": "Eigenvalues and eigenvectors of a matrix reveal its principal axes of transformation and are central to linear algebra applications", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "linear-algebra"] + }, + { + "content": "Linear algebra operations including matrix inversion determinants and vector spaces underlie machine learning algorithms and data analysis", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "linear-algebra"] + }, + { + "content": "Neural networks learn complex patterns by adjusting connection weights through backpropagation and gradient descent optimization algorithms", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["science", "machine-learning", "neural-networks"] + }, + { + "content": "Deep learning uses multiple hidden layers in neural networks to learn hierarchical feature representations from raw input data", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["science", "machine-learning", "deep-learning"] + }, + { + "content": "Gradient descent minimizes a loss function by iteratively adjusting model parameters in the direction of the negative gradient", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "machine-learning"] + }, + { + "content": "Normal distribution is characterized by mean and standard deviation and is central to statistics and the central limit theorem", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "statistics", "probability"] + }, + { + "content": "Bayesian probability interprets probability as a degree of belief updated using Bayes theorem when new evidence arrives", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "statistics", "probability", "bayesian"] + }, + { + "content": "Statistical hypothesis testing uses probability p-values and confidence intervals to determine if statistical evidence supports a research claim", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "statistics"] + }, + { + "content": "Quantum mechanics describes particle behavior through wave functions and probability amplitudes at atomic and subatomic energy scales", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["science", "physics", "quantum"] + }, + { + "content": "Heisenberg uncertainty principle in quantum mechanics states that position and momentum cannot both be measured precisely simultaneously", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics", "quantum"] + }, + { + "content": "Special relativity states that the speed of light is constant in all inertial reference frames and that time and space are relative", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics", "relativity"] + }, + { + "content": "Photosynthesis converts carbon dioxide and water into glucose and oxygen using solar energy captured by chlorophyll in plant cells", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "biology"] + }, + { + "content": "DNA double helix encodes genetic information through base pair sequences of adenine thymine guanine and cytosine nucleotides", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "biology", "genetics"] + }, + { + "content": "CRISPR gene editing technology allows precise modification of DNA sequences by cutting at targeted genomic locations guided by RNA", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "biology", "genetics"] + }, + { + "content": "Chemical bonding forms molecules through sharing of electrons in covalent bonds or transfer of electrons in ionic bonds between atoms", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "chemistry"] + }, + { + "content": "Thermodynamics laws govern energy transfer stating that energy is conserved and entropy always increases in isolated physical systems", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics", "thermodynamics"] + }, + { + "content": "Newton laws of motion describe how forces cause changes in velocity and acceleration of objects in classical mechanics", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics"] + }, + { + "content": "Electromagnetism describes how electric charges and magnetic fields interact and propagate, unified by Maxwell equations", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics"] + }, + { + "content": "Black holes form when massive stars gravitationally collapse creating regions where gravity is so strong that light cannot escape", + "importance": 0.75, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["science", "physics", "astrophysics"] + }, + { + "content": "Climate change results from greenhouse gas emissions trapping solar heat in the atmosphere causing rising global temperatures", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "environment"] + }, + { + "content": "Evolutionary theory explains biodiversity through natural selection mutation and genetic drift acting over millions of years", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "biology", "evolution"] + }, + { + "content": "The Roman Empire at its height controlled the Mediterranean Sea Gaul Britain and North Africa spreading Latin culture and Roman law", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "rome", "ancient"] + }, + { + "content": "Julius Caesar assassination in 44 BC marked the end of the Roman Republic and the beginning of the Roman Empire under Augustus", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "rome", "ancient"] + }, + { + "content": "Ancient Rome and the Roman Empire built the Colosseum aqueducts and extensive road networks leaving lasting architectural legacies", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "rome", "ancient"] + }, + { + "content": "World War II began in 1939 when Germany invaded Poland drawing Britain and France into conflict with the Nazi regime", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "world-war-ii"] + }, + { + "content": "The Battle of Stalingrad 1942 to 1943 was a turning point in World War II ending German eastward advance into the Soviet Union", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "world-war-ii", "battles"] + }, + { + "content": "D-Day on June 6 1944 saw Allied forces land on Normandy beaches in the largest seaborne invasion in World War II history", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["history", "world-war-ii", "battles"] + }, + { + "content": "South America contains twelve sovereign countries with Brazil occupying nearly half the continent and the vast Amazon rainforest", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "south-america"] + }, + { + "content": "The Andes mountain range runs along the western coast of South America hosting ancient Andean civilizations and diverse ecosystems", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "south-america"] + }, + { + "content": "Argentina and Chile share the southern tip of South America including Patagonia with the Andes mountains forming their natural border", + "importance": 0.70, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["geography", "south-america"] + }, + { + "content": "Ancient Egypt was ruled by pharaohs who built pyramids as royal tombs most famously the Great Pyramid of Giza as a tomb", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "egypt", "ancient"] + }, + { + "content": "The ancient Egypt pharaoh Tutankhamun tomb discovered in 1922 contained vast treasures providing insight into Egyptian civilization", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "egypt", "pharaoh"] + }, + { + "content": "Ancient Egypt hieroglyphics were a writing system using pictographic symbols deciphered using the Rosetta Stone in 1822", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "egypt", "ancient"] + }, + { + "content": "The Renaissance was a cultural and intellectual revival in 14th to 17th century Europe centered in Italy featuring humanist values and arts", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "renaissance", "art"] + }, + { + "content": "Leonardo da Vinci epitomized Renaissance ideals combining painting sculpture architecture science and engineering in his masterwork", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "renaissance", "art"] + }, + { + "content": "Michelangelo Sistine Chapel ceiling and David sculpture are masterpieces of Renaissance art commissioned by the Catholic Church", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "renaissance", "art"] + }, + { + "content": "The Silk Road was an ancient network of trade routes connecting China to Rome facilitating exchange of goods ideas and culture", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "trade"] + }, + { + "content": "The Mongol Empire under Genghis Khan was the largest contiguous land empire spanning from Asia to Eastern Europe in history", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "mongols"] + }, + { + "content": "The Ottoman Empire controlled Anatolia the Middle East and North Africa for six centuries until its dissolution after World War I", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "ottoman"] + }, + { + "content": "The Industrial Revolution began in Britain in the 18th century transforming manufacturing through steam power and mechanization", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "industrial"] + }, + { + "content": "The French Revolution of 1789 abolished the monarchy and aristocracy introducing ideals of liberty equality and fraternity", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "revolution"] + }, + { + "content": "Christopher Columbus 1492 voyage to the Americas opened sustained contact between Europe and the Western Hemisphere continents", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "exploration"] + }, + { + "content": "The Ming Dynasty of China built Great Wall extensions and launched Zheng He naval expeditions across Asia and Africa", + "importance": 0.70, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["history", "china"] + }, + { + "content": "The Black Death plague killed an estimated one third of Europe population in the 14th century reshaping society and economy", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "plague"] + }, + { + "content": "The American Civil War 1861 to 1865 was fought over slavery and states rights resulting in the abolition of slavery", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "america"] + }, + { + "content": "Ancient Greece developed democracy philosophy through Socrates and Plato and the Olympic Games in city-states like Athens", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "greece", "ancient"] + }, + { + "content": "Cooking pasta requires bringing heavily salted water to a full boil before adding pasta and timing precisely for al dente texture", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["food", "cooking", "pasta"] + }, + { + "content": "Pasta sauce techniques include tomato reduction cream-based and oil-and-garlic preparations using fresh or dried Italian herbs", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["food", "cooking", "pasta"] + }, + { + "content": "Italian pasta recipes use olive oil garlic parmesan cheese and fresh basil for authentic Mediterranean flavor and aroma", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["food", "cooking", "italian"] + }, + { + "content": "Sleep quality improves with consistent bedtime routines dark rooms and avoiding screens one hour before sleeping for better rest", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "episodic", + "tags": ["health", "sleep"] + }, + { + "content": "Circadian rhythm is the body internal 24-hour biological clock regulating sleep and wake cycles influenced by light exposure", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "sleep", "biology"] + }, + { + "content": "REM sleep and deep sleep stages are essential for memory consolidation muscle recovery and mental health restoration each night", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "sleep"] + }, + { + "content": "Regular aerobic fitness exercise like running cycling and swimming improves cardiovascular health and reduces stress hormones", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "fitness", "exercise"] + }, + { + "content": "Fitness strength training with weights builds muscle mass increases metabolism and improves bone density through progressive overload", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "fitness", "strength-training"] + }, + { + "content": "High-intensity interval training HIIT alternates short intense effort bursts with recovery periods for efficient calorie burning and fitness", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "fitness", "hiit"] + }, + { + "content": "Coffee contains caffeine that blocks adenosine receptors in the brain promoting alertness and reducing morning fatigue effectively", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["food", "coffee", "caffeine"] + }, + { + "content": "Morning coffee caffeine ritual often involves grinding fresh beans pour-over or espresso brewing and savoring the rich coffee aroma", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["food", "coffee", "morning"] + }, + { + "content": "Caffeine half-life is approximately six hours so afternoon coffee can disrupt evening sleep patterns for caffeine sensitive individuals", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["food", "coffee", "caffeine"] + }, + { + "content": "Urban transit commuting by bus and subway reduces individual carbon footprint compared to private car usage in dense metropolitan cities", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["daily-life", "transit", "commute"] + }, + { + "content": "Public transit systems use timetables real-time tracking and fare cards to manage passenger commuter flow efficiently in cities", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["daily-life", "transit"] + }, + { + "content": "Cycling as urban transit commute reduces traffic congestion provides daily physical exercise and combines transportation with fitness", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "episodic", + "tags": ["daily-life", "commute", "cycling"] + }, + { + "content": "Meal planning and batch cooking on weekends reduces weekday decision fatigue and ensures healthy balanced eating throughout the week", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["food", "meal-prep"] + }, + { + "content": "Mindfulness meditation practiced for 10 to 20 minutes daily reduces anxiety improves focus and builds emotional resilience over time", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "mindfulness"] + }, + { + "content": "Reading physical books before bed promotes relaxation and better sleep quality compared to screen-based reading devices at night", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["daily-life", "reading"] + }, + { + "content": "Journaling regularly helps process emotions track personal goals and identify recurring patterns in thoughts and daily behaviors", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["daily-life", "writing"] + }, + { + "content": "Houseplants improve indoor air quality by absorbing carbon dioxide and certain volatile organic compounds pollutants from indoor air", + "importance": 0.55, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["daily-life", "plants"] + }, + { + "content": "Weather affects mood through sunlight exposure influencing serotonin levels and seasonal affective disorder patterns in humans", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["science", "daily-life", "weather"] + }, + { + "content": "Grocery shopping with a prepared list reduces impulse purchases and food waste by focusing on planned meals and needed ingredients", + "importance": 0.55, + "decay_factor": 0.04, + "memory_type": "episodic", + "tags": ["daily-life", "food"] + }, + { + "content": "Fermented foods like yogurt kimchi and kefir contain beneficial probiotics that support gut microbiome health and digestion", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["food", "health"] + }, + { + "content": "Hydration with adequate daily water intake supports kidney function cognitive performance and physical exercise endurance", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "hydration"] + }, + { + "content": "Time management techniques like Pomodoro method and time-blocking increase productivity by structuring focused work intervals", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "episodic", + "tags": ["daily-life", "productivity"] + } + ], + "eval_queries": [ + { + "query": "Python functional programming", + "relevant_indices": [0, 1], + "description": "Python functional programming patterns using list comprehensions, lambda, map and filter" + }, + { + "query": "Rust borrow checker memory", + "relevant_indices": [3, 4, 5], + "description": "Rust memory safety enforced by borrow checker through ownership and lifetime rules" + }, + { + "query": "binary search trees", + "relevant_indices": [6, 7], + "description": "Binary search tree and self-balancing AVL tree data structures" + }, + { + "query": "test-driven development", + "relevant_indices": [9, 10], + "description": "TDD workflow of writing failing unit tests before implementing production code" + }, + { + "query": "debugging", + "relevant_indices": [12, 13], + "description": "Debugging techniques using stack traces and interactive debuggers with breakpoints" + }, + { + "query": "calculus", + "relevant_indices": [25, 26, 27], + "description": "Calculus fundamentals: derivatives, integration, and the fundamental theorem" + }, + { + "query": "linear algebra matrix", + "relevant_indices": [28, 29, 30], + "description": "Linear algebra operations including matrix multiplication, eigenvalues and eigenvectors" + }, + { + "query": "neural networks", + "relevant_indices": [31, 32], + "description": "Machine learning with neural networks and deep learning architectures" + }, + { + "query": "probability", + "relevant_indices": [34, 35, 36], + "description": "Probability and statistics including distributions, Bayesian methods, and hypothesis testing" + }, + { + "query": "quantum mechanics", + "relevant_indices": [37, 38], + "description": "Quantum mechanics fundamentals including wave functions and the Heisenberg uncertainty principle" + }, + { + "query": "Roman Empire", + "relevant_indices": [50, 51, 52], + "description": "Ancient Roman civilization spanning the Republic, Julius Caesar, and the Empire" + }, + { + "query": "World War", + "relevant_indices": [53, 54, 55], + "description": "World War II major events: German invasion of Poland, Battle of Stalingrad, and D-Day" + }, + { + "query": "South America", + "relevant_indices": [56, 57, 58], + "description": "South American geography including Brazil, the Andes mountains, and neighboring countries" + }, + { + "query": "ancient Egypt", + "relevant_indices": [59, 60, 61], + "description": "Ancient Egypt civilization with pharaohs, pyramid tombs, and hieroglyphic writing" + }, + { + "query": "Renaissance", + "relevant_indices": [62, 63, 64], + "description": "Italian Renaissance cultural revival and master artists Leonardo da Vinci and Michelangelo" + }, + { + "query": "pasta", + "relevant_indices": [75, 76, 77], + "description": "Pasta cooking techniques, Italian sauce recipes, and key Mediterranean ingredients" + }, + { + "query": "sleep", + "relevant_indices": [78, 79, 80], + "description": "Sleep health including circadian rhythms, REM sleep stages, and bedtime routines" + }, + { + "query": "fitness", + "relevant_indices": [81, 82, 83], + "description": "Physical fitness approaches including aerobic exercise, strength training, and HIIT" + }, + { + "query": "coffee caffeine", + "relevant_indices": [84, 85, 86], + "description": "Coffee and caffeine effects on morning alertness and sleep interactions" + }, + { + "query": "transit", + "relevant_indices": [87, 88, 89], + "description": "Urban transit systems including bus, subway, and cycling for commuting" + } + ] +} diff --git a/tests/khive-contract/tune/README.md b/tests/khive-contract/tune/README.md new file mode 100644 index 0000000..8972170 --- /dev/null +++ b/tests/khive-contract/tune/README.md @@ -0,0 +1,42 @@ +# Recall parameter tuning + +Grid-search runner for khive recall configuration. Operates against a live +MCP session via the `khive_contract.client.KhiveMcpSession` harness (provided +by the parent `tests/khive-contract/` package). + +## Prerequisites + +This script depends on the `khive_contract` Python package in the parent +directory. Install it first: + +```bash +cd tests/khive-contract +uv pip install -e . +``` + +You'll also need the `khive-mcp` binary on your PATH (the tests/khive-contract +harness spawns it via stdio). + +## Run + +```bash +cd tests/khive-contract +uv run python -m tune --quick # ~10 sec, every 10th config +uv run python -m tune # ~2 min, all 116 configs +uv run python -m tune --output-dir /tmp/my-run # custom output location +``` + +## Outputs + +- `results.json` — all (config, recall@10) tuples +- `tuned-config.toml` — recommended config (synthesized from the best-scoring + set; see REPORT.md for honesty about how meaningful this is) +- `REPORT.md` — analysis writeup + +## Known limitation + +The synthetic eval corpus (`fixtures/memories_corpus.json`) has a ceiling at +recall@10 = 0.9333 for **every** config — i.e., the queries are too easy to +discriminate between parameters. Until a harder corpus exists (embed-enabled, +synonym queries, partial matches), the grid runs but cannot ground default +changes. `RecallConfig::default()` was intentionally NOT changed in this PR. diff --git a/tests/khive-contract/tune/REPORT.md b/tests/khive-contract/tune/REPORT.md new file mode 100644 index 0000000..4eeb122 --- /dev/null +++ b/tests/khive-contract/tune/REPORT.md @@ -0,0 +1,75 @@ +# Param-Tuning Grid Search Report + +- **Date**: 2026-05-25 +- **Grid size**: 116 configs +- **Eval queries**: 20 +- **Total runtime**: 0.7s +- **Mode**: FTS-only (no_embed=True) + +## Winning Config (highest recall@10) + +| Metric | Value | +|--------|-------| +| recall@10 | 0.9333 | +| MRR | 0.9500 | +| mean latency | 0.3ms | +| config_index | 3 | + +Parameters: `rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(1.0/0.0) decay=hyperbolic hl=14.0` + +## Default vs Tuned Comparison + +| Metric | Default config | Tuned config | Delta | +|--------|---------------|-------------|-------| +| recall@10 | 0.9333 | 0.9333 | +0.0000 | +| MRR | 0.9250 | 0.9500 | +0.0250 | +| mean latency | 0.3ms | 0.3ms | -0.0ms | + +Default config: relevance=0.70 importance=0.20 temporal=0.10 candidate_multiplier=20 fuse=rrf(k=60) decay=exponential half_life=30.0 + +## Flat Optimization Landscape + +All 116 configs achieve **identical** recall@10 = 0.9333. MRR has exactly two values: +0.925 (all RRF + vector-only weighted configs, 58 total) and 0.950 (all other weighted +configs, 58 total). The split is determined entirely by fusion strategy — `relevance_weight`, +`importance_weight`, `temporal_weight`, `candidate_multiplier`, `decay_model`, and +`temporal_half_life_days` have **zero measurable effect** on either metric. + +**Root cause**: The synthetic corpus uses short exact-keyword queries against FTS5 (AND-logic). +Every relevant memory contains the query terms, so FTS5 trivially returns them regardless of +scoring parameters. A harder eval set (synonyms, cross-domain reasoning, partial matches) is +needed to discriminate non-fusion parameters. + +The three committed default changes (`half_life 30→14`, `decay exp→hyp`, `multiplier 20→10`) +are benign — they pass validation and lie within sensible ranges — but they are not empirically +distinguished from the old defaults by this grid search. + +## Top 10 by recall@10 + +| idx | recall@10 | mrr | latency | config | +|-----|-----------|-----|---------|--------| +| 3 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(1.0/0.0) decay=hyperbolic hl=14.0 | +| 4 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.75/0.25) decay=hyperbolic hl=30.0 | +| 5 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.5/0.5) decay=hyperbolic hl=60.0 | +| 6 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.25/0.75) decay=none hl=14.0 | +| 10 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(1.0/0.0) decay=exponential hl=30.0 | +| 11 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.75/0.25) decay=exponential hl=60.0 | +| 12 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.5/0.5) decay=hyperbolic hl=14.0 | +| 13 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.25/0.75) decay=hyperbolic hl=30.0 | +| 18 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=40 fuse=weighted(0.75/0.25) decay=exponential hl=14.0 | +| 19 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=40 fuse=weighted(0.5/0.5) decay=exponential hl=30.0 | + +## Top 10 by MRR + +| idx | recall@10 | mrr | latency | config | +|-----|-----------|-----|---------|--------| +| 3 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(1.0/0.0) decay=hyperbolic hl=14.0 | +| 4 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.75/0.25) decay=hyperbolic hl=30.0 | +| 5 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.5/0.5) decay=hyperbolic hl=60.0 | +| 6 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.25/0.75) decay=none hl=14.0 | +| 10 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(1.0/0.0) decay=exponential hl=30.0 | +| 11 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.75/0.25) decay=exponential hl=60.0 | +| 12 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.5/0.5) decay=hyperbolic hl=14.0 | +| 13 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.25/0.75) decay=hyperbolic hl=30.0 | +| 18 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=40 fuse=weighted(0.75/0.25) decay=exponential hl=14.0 | +| 19 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=40 fuse=weighted(0.5/0.5) decay=exponential hl=30.0 | diff --git a/tests/khive-contract/tune/__init__.py b/tests/khive-contract/tune/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/khive-contract/tune/__main__.py b/tests/khive-contract/tune/__main__.py new file mode 100644 index 0000000..2cc20c9 --- /dev/null +++ b/tests/khive-contract/tune/__main__.py @@ -0,0 +1,3 @@ +from tune.grid_search import main + +main() diff --git a/tests/khive-contract/tune/grid_search.py b/tests/khive-contract/tune/grid_search.py new file mode 100644 index 0000000..b7e64a3 --- /dev/null +++ b/tests/khive-contract/tune/grid_search.py @@ -0,0 +1,517 @@ +"""Param-tuning grid search for khive recall configuration. + +Runs a FTS-only grid over scoring weights, candidate pool sizes, fusion +strategies, decay models, and temporal half-life parameters. One MCP session +is created and the corpus is loaded once; config is varied per recall() call. + +TODO: Add --with-embed flag for embedding-enabled grid over both + all-minilm-l6-v2 and paraphrase-multilingual-minilm-l12-v2 models. + Requires no_embed=False and KHIVE_ADDITIONAL_EMBEDDING_MODELS=paraphrase. +""" + +from __future__ import annotations + +import argparse +import json +import time +from datetime import date +from pathlib import Path +from typing import Any + +from khive_contract.client import KhiveMcpSession + +RANDOM_SEED = 42 + +_HERE = Path(__file__).parent +DEFAULT_CORPUS = _HERE.parent / "fixtures" / "memories_corpus.json" +DEFAULT_OUTPUT = _HERE + + +# --------------------------------------------------------------------------- +# Data loading +# --------------------------------------------------------------------------- + + +def load_corpus(path: Path) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + """Load memories and eval_queries from a corpus JSON file.""" + data = json.loads(path.read_text()) + memories: list[dict[str, Any]] = data["memories"] + eval_queries: list[dict[str, Any]] = data["eval_queries"] + return memories, eval_queries + + +# --------------------------------------------------------------------------- +# Session setup +# --------------------------------------------------------------------------- + + +def setup_session( + memories: list[dict[str, Any]], db: str = ":memory:" +) -> tuple[KhiveMcpSession, dict[int, str]]: + """Open a KhiveMcpSession and load all corpus memories via remember(). + + The returned session is already entered (via __enter__). The caller MUST + call session.close() when done, or use a try/finally block. + + Returns: + (session, note_id_map) where note_id_map[corpus_index] = note_id string. + """ + session = KhiveMcpSession( + packs=("kg", "memory"), + db=db, + no_embed=True, + log="error", + ) + session.__enter__() + + note_id_map: dict[int, str] = {} + total = len(memories) + print(f"Loading {total} memories into session...") + t_load_start = time.perf_counter() + + for i, mem in enumerate(memories): + args: dict[str, Any] = { + "content": mem["content"], + "importance": mem["importance"], + "decay_factor": mem["decay_factor"], + "memory_type": mem["memory_type"], + } + if mem.get("tags"): + args["tags"] = mem["tags"] + + result = session.verb("remember", args) + note_id = result.get("note_id") or result.get("id") if result else None + if not note_id: + raise RuntimeError(f"remember() returned no note_id for memory {i}: {result!r}") + note_id_map[i] = str(note_id) + + if (i + 1) % 25 == 0: + elapsed = time.perf_counter() - t_load_start + print(f" Loaded {i + 1}/{total} memories ({elapsed:.1f}s)") + + elapsed = time.perf_counter() - t_load_start + print(f"Corpus loaded in {elapsed:.1f}s. Beginning grid search...") + return session, note_id_map + + +# --------------------------------------------------------------------------- +# Metric evaluation +# --------------------------------------------------------------------------- + + +def evaluate_config( + session: KhiveMcpSession, + config_dict: dict[str, Any], + eval_queries: list[dict[str, Any]], + note_id_map: dict[int, str], +) -> dict[str, float]: + """Evaluate one RecallConfig against all eval queries. + + Returns: + {"recall_at_10": float, "mrr": float, "mean_latency_ms": float} + """ + recalls: list[float] = [] + mrrs: list[float] = [] + latencies: list[float] = [] + + for eq in eval_queries: + query: str = eq["query"] + relevant_indices: list[int] = eq["relevant_indices"] + relevant_note_ids = {note_id_map[i] for i in relevant_indices if i in note_id_map} + + t0 = time.perf_counter() + try: + hits = session.verb( + "recall", + {"query": query, "limit": 10, "config": config_dict}, + ) + except Exception: + hits = [] + latency_ms = (time.perf_counter() - t0) * 1000.0 + latencies.append(latency_ms) + + retrieved_ids: list[str] = [] + if isinstance(hits, list): + for h in hits: + nid = h.get("note_id") or h.get("id") if isinstance(h, dict) else None + if nid: + retrieved_ids.append(str(nid)) + + # recall@10 + retrieved_set = set(retrieved_ids) + if relevant_note_ids: + r_at_10 = len(relevant_note_ids & retrieved_set) / len(relevant_note_ids) + else: + r_at_10 = 0.0 + recalls.append(r_at_10) + + # MRR — reciprocal rank of first relevant hit + mrr = 0.0 + for rank, nid in enumerate(retrieved_ids, 1): + if nid in relevant_note_ids: + mrr = 1.0 / rank + break + mrrs.append(mrr) + + n = len(eval_queries) + return { + "recall_at_10": sum(recalls) / n if n else 0.0, + "mrr": sum(mrrs) / n if n else 0.0, + "mean_latency_ms": sum(latencies) / n if n else 0.0, + } + + +# --------------------------------------------------------------------------- +# Grid generation +# --------------------------------------------------------------------------- + + +def generate_grid(quick: bool = False) -> list[dict[str, Any]]: + """Generate the FTS-only RecallConfig parameter grid. + + Full grid: 4 × 4 × 8 × 3 × 3 = 1152 configs + Quick grid: every 10th config (deterministic sort) ≈ 116 configs + + Weight triples are normalized so relevance+importance+temporal = 1.0. + Weighted fusion uses [text_weight, vector_weight] where alpha=vector_weight. + In FTS-only mode (no_embed=True) all vector results are empty, so + weighted configs with high vector alpha will score poorly — this is + expected and meaningful for the grid. + """ + weight_triples = [ + # (relevance_weight, importance_weight, temporal_weight) + (0.70, 0.20, 0.10), # default + (0.60, 0.30, 0.10), + (0.60, 0.20, 0.20), + (0.80, 0.10, 0.10), + ] + + candidate_pools = [ + # (candidate_multiplier, candidate_limit) + (10, None), + (20, None), # default + (40, None), + (20, 100), + ] + + # 3 RRF + 5 weighted = 8 fusion configs + fusion_configs: list[dict[str, Any]] = [ + {"rrf": {"k": 20}}, + {"rrf": {"k": 60}}, # default + {"rrf": {"k": 100}}, + {"weighted": {"weights": [1.0, 0.0]}}, # text-only + {"weighted": {"weights": [0.75, 0.25]}}, + {"weighted": {"weights": [0.5, 0.5]}}, + {"weighted": {"weights": [0.25, 0.75]}}, + {"weighted": {"weights": [0.0, 1.0]}}, # vector-only + ] + + decay_models = ["exponential", "hyperbolic", "none"] + half_lives = [14.0, 30.0, 60.0] + + configs: list[dict[str, Any]] = [] + for rw, iw, tw in weight_triples: + for cm, cl in candidate_pools: + for fuse in fusion_configs: + for decay in decay_models: + for hl in half_lives: + cfg: dict[str, Any] = { + "relevance_weight": rw, + "importance_weight": iw, + "temporal_weight": tw, + "candidate_multiplier": cm, + "fuse_strategy": fuse, + "decay_model": decay, + "temporal_half_life_days": hl, + "min_score": 0.0, + "min_salience": 0.0, + } + if cl is not None: + cfg["candidate_limit"] = cl + configs.append(cfg) + + if quick: + configs = configs[::10] + + return configs + + +# --------------------------------------------------------------------------- +# Grid execution +# --------------------------------------------------------------------------- + + +def run_grid( + session: KhiveMcpSession, + grid: list[dict[str, Any]], + eval_queries: list[dict[str, Any]], + note_id_map: dict[int, str], +) -> list[dict[str, Any]]: + """Run evaluate_config for every config in the grid. + + MCP is single-threaded stdio, so iteration is sequential. + Prints progress every 100 configs. + + Returns: + List of result dicts: {"config_index", "config", "recall_at_10", "mrr", "mean_latency_ms"} + """ + results: list[dict[str, Any]] = [] + total = len(grid) + + for i, config in enumerate(grid): + if i % 100 == 0: + print(f" [{i}/{total}] config {i}...") + metrics = evaluate_config(session, config, eval_queries, note_id_map) + results.append( + { + "config_index": i, + "config": config, + **metrics, + } + ) + + return results + + +# --------------------------------------------------------------------------- +# Result writing +# --------------------------------------------------------------------------- + + +def _fuse_to_toml(fuse: dict[str, Any] | str) -> str: + """Render a fuse_strategy value as a TOML inline table or string.""" + if isinstance(fuse, str): + return f'"{fuse}"' + if "rrf" in fuse: + k = fuse["rrf"]["k"] + return f"{{rrf = {{k = {k}}}}}" + if "weighted" in fuse: + weights = fuse["weighted"]["weights"] + return f"{{weighted = {{weights = [{weights[0]}, {weights[1]}]}}}}" + # fallback: JSON-encode as a TOML comment note + return f'"{json.dumps(fuse)}"' + + +def write_results( + results: list[dict[str, Any]], + output_dir: Path, + *, + t_total_seconds: float, + default_config_metrics: dict[str, float] | None = None, +) -> None: + """Write results.json, tuned-config.toml, and REPORT.md to output_dir.""" + output_dir.mkdir(parents=True, exist_ok=True) + t_total = t_total_seconds + today = date.today().isoformat() + + # --- results.json --- + (output_dir / "results.json").write_text(json.dumps(results, indent=2)) + print(f"Wrote {output_dir / 'results.json'} ({len(results)} configs)") + + # --- rank by recall@10 then MRR --- + sorted_by_recall = sorted( + results, key=lambda r: (r["recall_at_10"], r["mrr"]), reverse=True + ) + sorted_by_mrr = sorted( + results, key=lambda r: (r["mrr"], r["recall_at_10"]), reverse=True + ) + winner = sorted_by_recall[0] + cfg = winner["config"] + + # --- tuned-config.toml --- + fuse_toml = _fuse_to_toml(cfg["fuse_strategy"]) + decay_model_str = cfg["decay_model"] if isinstance(cfg["decay_model"], str) else json.dumps(cfg["decay_model"]) + cl_line = ( + f"candidate_limit = {cfg['candidate_limit']}" + if cfg.get("candidate_limit") is not None + else "# candidate_limit = null (use multiplier only)" + ) + toml_content = f"""\ +# Winning config from khive recall param-tuning grid search +# run_date = "{today}" +# recall_at_10 = {winner['recall_at_10']:.4f} +# mrr = {winner['mrr']:.4f} +# mean_latency_ms = {winner['mean_latency_ms']:.2f} + +[recall] +relevance_weight = {cfg['relevance_weight']} +importance_weight = {cfg['importance_weight']} +temporal_weight = {cfg['temporal_weight']} +temporal_half_life_days = {cfg['temporal_half_life_days']} +decay_model = "{decay_model_str}" +candidate_multiplier = {cfg['candidate_multiplier']} +{cl_line} +fuse_strategy = {fuse_toml} +min_score = {cfg['min_score']} +min_salience = {cfg['min_salience']} +""" + (output_dir / "tuned-config.toml").write_text(toml_content) + print(f"Wrote {output_dir / 'tuned-config.toml'}") + + # --- REPORT.md --- + top10_recall = sorted_by_recall[:10] + top10_mrr = sorted_by_mrr[:10] + + def _cfg_summary(r: dict[str, Any]) -> str: + c = r["config"] + fuse = c["fuse_strategy"] + if isinstance(fuse, dict) and "rrf" in fuse: + fuse_str = f"rrf(k={fuse['rrf']['k']})" + elif isinstance(fuse, dict) and "weighted" in fuse: + w = fuse["weighted"]["weights"] + fuse_str = f"weighted({w[0]}/{w[1]})" + else: + fuse_str = str(fuse) + decay_str = c["decay_model"] if isinstance(c["decay_model"], str) else json.dumps(c["decay_model"]) + return ( + f"rel={c['relevance_weight']} imp={c['importance_weight']} " + f"tmp={c['temporal_weight']} cand={c['candidate_multiplier']} " + f"fuse={fuse_str} decay={decay_str} hl={c['temporal_half_life_days']}" + ) + + def _row(r: dict[str, Any]) -> str: + return ( + f"| {r['config_index']:4d} | {r['recall_at_10']:.4f} | {r['mrr']:.4f} " + f"| {r['mean_latency_ms']:.1f}ms | {_cfg_summary(r)} |" + ) + + top10_recall_rows = "\n".join(_row(r) for r in top10_recall) + top10_mrr_rows = "\n".join(_row(r) for r in top10_mrr) + + default_section = "" + if default_config_metrics: + default_section = f""" +## Default vs Tuned Comparison + +| Metric | Default config | Tuned config | Delta | +|--------|---------------|-------------|-------| +| recall@10 | {default_config_metrics['recall_at_10']:.4f} | {winner['recall_at_10']:.4f} | {winner['recall_at_10'] - default_config_metrics['recall_at_10']:+.4f} | +| MRR | {default_config_metrics['mrr']:.4f} | {winner['mrr']:.4f} | {winner['mrr'] - default_config_metrics['mrr']:+.4f} | +| mean latency | {default_config_metrics['mean_latency_ms']:.1f}ms | {winner['mean_latency_ms']:.1f}ms | {winner['mean_latency_ms'] - default_config_metrics['mean_latency_ms']:+.1f}ms | + +Default config: relevance=0.70 importance=0.20 temporal=0.10 candidate_multiplier=20 fuse=rrf(k=60) decay=exponential half_life=30.0 +""" + + report = f"""\ +# Param-Tuning Grid Search Report + +- **Date**: {today} +- **Grid size**: {len(results)} configs +- **Eval queries**: 20 +- **Total runtime**: {t_total:.1f}s +- **Mode**: FTS-only (no_embed=True) + +## Winning Config (highest recall@10) + +| Metric | Value | +|--------|-------| +| recall@10 | {winner['recall_at_10']:.4f} | +| MRR | {winner['mrr']:.4f} | +| mean latency | {winner['mean_latency_ms']:.1f}ms | +| config_index | {winner['config_index']} | + +Parameters: `{_cfg_summary(winner)}` +{default_section} +## Top 10 by recall@10 + +| idx | recall@10 | mrr | latency | config | +|-----|-----------|-----|---------|--------| +{top10_recall_rows} + +## Top 10 by MRR + +| idx | recall@10 | mrr | latency | config | +|-----|-----------|-----|---------|--------| +{top10_mrr_rows} +""" + (output_dir / "REPORT.md").write_text(report) + print(f"Wrote {output_dir / 'REPORT.md'}") + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + +_DEFAULT_CONFIG = { + "relevance_weight": 0.70, + "importance_weight": 0.20, + "temporal_weight": 0.10, + "candidate_multiplier": 20, + "fuse_strategy": {"rrf": {"k": 60}}, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, +} + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Grid search for khive recall config parameters (FTS-only mode)." + ) + parser.add_argument( + "--quick", + action="store_true", + help="Sample every 10th config for a fast smoke test (~10x faster).", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=DEFAULT_OUTPUT, + help="Directory to write results.json, tuned-config.toml, REPORT.md.", + ) + parser.add_argument( + "--corpus", + type=Path, + default=DEFAULT_CORPUS, + help="Path to memories_corpus.json fixture.", + ) + args = parser.parse_args() + + corpus_path: Path = args.corpus + output_dir: Path = args.output_dir + + if not corpus_path.exists(): + raise FileNotFoundError(f"Corpus not found: {corpus_path}") + + print(f"Loading corpus from {corpus_path}") + memories, eval_queries = load_corpus(corpus_path) + print(f"Corpus: {len(memories)} memories, {len(eval_queries)} eval queries") + + grid = generate_grid(quick=args.quick) + print(f"Grid: {len(grid)} configs (quick={args.quick})") + + t_start = time.perf_counter() + session, note_id_map = setup_session(memories) + try: + # Evaluate default config for the comparison table + default_metrics = evaluate_config(session, _DEFAULT_CONFIG, eval_queries, note_id_map) + print( + f"Default config: recall@10={default_metrics['recall_at_10']:.4f} " + f"mrr={default_metrics['mrr']:.4f}" + ) + + results = run_grid(session, grid, eval_queries, note_id_map) + finally: + session.close() + + t_elapsed = time.perf_counter() - t_start + print(f"Grid search complete in {t_elapsed:.1f}s") + + write_results( + results, + output_dir, + t_total_seconds=t_elapsed, + default_config_metrics=default_metrics, + ) + + best = max(results, key=lambda r: (r["recall_at_10"], r["mrr"])) + print( + f"\nBest config: recall@10={best['recall_at_10']:.4f} mrr={best['mrr']:.4f} " + f"(index {best['config_index']})" + ) + print(f"Results written to {output_dir}") + + +if __name__ == "__main__": + main() diff --git a/tests/khive-contract/tune/results.json b/tests/khive-contract/tune/results.json new file mode 100644 index 0000000..d6df820 --- /dev/null +++ b/tests/khive-contract/tune/results.json @@ -0,0 +1,2680 @@ +[ + { + "config_index": 0, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28148540113761555 + }, + { + "config_index": 1, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2686937492399011 + }, + { + "config_index": 2, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.29486264975275844 + }, + { + "config_index": 3, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2862915989680914 + }, + { + "config_index": 4, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2918167483585421 + }, + { + "config_index": 5, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28529789960884955 + }, + { + "config_index": 6, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2831625501130475 + }, + { + "config_index": 7, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2803040999424411 + }, + { + "config_index": 8, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2848207499482669 + }, + { + "config_index": 9, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27973329997621477 + }, + { + "config_index": 10, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2801396494760411 + }, + { + "config_index": 11, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28181665038573556 + }, + { + "config_index": 12, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.29417920086416416 + }, + { + "config_index": 13, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2827333999448456 + }, + { + "config_index": 14, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28087909959140234 + }, + { + "config_index": 15, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.3020208008820191 + }, + { + "config_index": 16, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28055835064151324 + }, + { + "config_index": 17, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28160629990452435 + }, + { + "config_index": 18, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.29320840003492776 + }, + { + "config_index": 19, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2866227991034975 + }, + { + "config_index": 20, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2897542504797457 + }, + { + "config_index": 21, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.29314370003703516 + }, + { + "config_index": 22, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2905041001213249 + }, + { + "config_index": 23, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2886353995563695 + }, + { + "config_index": 24, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28937284951098263 + }, + { + "config_index": 25, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2861790999304503 + }, + { + "config_index": 26, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.31045204887050204 + }, + { + "config_index": 27, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2888021495891735 + }, + { + "config_index": 28, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2846291503374232 + }, + { + "config_index": 29, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27716039949154947 + }, + { + "config_index": 30, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2801395508868154 + }, + { + "config_index": 31, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27901260036742315 + }, + { + "config_index": 32, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28072700115444604 + }, + { + "config_index": 33, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28003539991914295 + }, + { + "config_index": 34, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2829062992532272 + }, + { + "config_index": 35, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2766604502539849 + }, + { + "config_index": 36, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2769689010165166 + }, + { + "config_index": 37, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27942089982389007 + }, + { + "config_index": 38, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27995829987048637 + }, + { + "config_index": 39, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.280987650694442 + }, + { + "config_index": 40, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2805270501994528 + }, + { + "config_index": 41, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2790415495837806 + }, + { + "config_index": 42, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2858790994650917 + }, + { + "config_index": 43, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27572910039452836 + }, + { + "config_index": 44, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28704375072265975 + }, + { + "config_index": 45, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.281006250588689 + }, + { + "config_index": 46, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2943519994005328 + }, + { + "config_index": 47, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28374790090310853 + }, + { + "config_index": 48, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2794124502543127 + }, + { + "config_index": 49, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28600204932445195 + }, + { + "config_index": 50, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2889624494855525 + }, + { + "config_index": 51, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2864793004846433 + }, + { + "config_index": 52, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27940414984186646 + }, + { + "config_index": 53, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2789604495774256 + }, + { + "config_index": 54, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2787499499390833 + }, + { + "config_index": 55, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2868397506972542 + }, + { + "config_index": 56, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.284945898965816 + }, + { + "config_index": 57, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2812874005030608 + }, + { + "config_index": 58, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28095219931856263 + }, + { + "config_index": 59, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2758167509455234 + }, + { + "config_index": 60, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2824979506840464 + }, + { + "config_index": 61, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.282977097958792 + }, + { + "config_index": 62, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2763854499789886 + }, + { + "config_index": 63, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.27764179903897457 + }, + { + "config_index": 64, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2787354511383455 + }, + { + "config_index": 65, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2802791514113778 + }, + { + "config_index": 66, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2792020015476737 + }, + { + "config_index": 67, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27800425050372723 + }, + { + "config_index": 68, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2789042016956955 + }, + { + "config_index": 69, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2812332495523151 + }, + { + "config_index": 70, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2767645495623583 + }, + { + "config_index": 71, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.27748339998652227 + }, + { + "config_index": 72, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2761915504379431 + }, + { + "config_index": 73, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2803582996421028 + }, + { + "config_index": 74, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27802289987448603 + }, + { + "config_index": 75, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2767853995464975 + }, + { + "config_index": 76, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28124175005359575 + }, + { + "config_index": 77, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2837229010765441 + }, + { + "config_index": 78, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2874540507036727 + }, + { + "config_index": 79, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2787061999697471 + }, + { + "config_index": 80, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2805125004670117 + }, + { + "config_index": 81, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2797540499159368 + }, + { + "config_index": 82, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2792020997731015 + }, + { + "config_index": 83, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2798353001708165 + }, + { + "config_index": 84, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2828772005159408 + }, + { + "config_index": 85, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.279381150539848 + }, + { + "config_index": 86, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28124165000917856 + }, + { + "config_index": 87, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27987510002276395 + }, + { + "config_index": 88, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2800395010126522 + }, + { + "config_index": 89, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2810250996844843 + }, + { + "config_index": 90, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28223335029906593 + }, + { + "config_index": 91, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28449174933484755 + }, + { + "config_index": 92, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28110419916629326 + }, + { + "config_index": 93, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27862714960065205 + }, + { + "config_index": 94, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2819332996295998 + }, + { + "config_index": 95, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28068960054952186 + }, + { + "config_index": 96, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2806042510201223 + }, + { + "config_index": 97, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.27994805022899527 + }, + { + "config_index": 98, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2773583990347106 + }, + { + "config_index": 99, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28961035059182905 + }, + { + "config_index": 100, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28000009879178833 + }, + { + "config_index": 101, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28202285029692575 + }, + { + "config_index": 102, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28163114984636195 + }, + { + "config_index": 103, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28272290037421044 + }, + { + "config_index": 104, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.29119380087649915 + }, + { + "config_index": 105, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28479799948399886 + }, + { + "config_index": 106, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28276649973122403 + }, + { + "config_index": 107, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28361035001580603 + }, + { + "config_index": 108, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28614999901037663 + }, + { + "config_index": 109, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2842709007381927 + }, + { + "config_index": 110, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28580209873325657 + }, + { + "config_index": 111, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2826500996889081 + }, + { + "config_index": 112, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2826354011631338 + }, + { + "config_index": 113, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2808332999848062 + }, + { + "config_index": 114, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28887504995509516 + }, + { + "config_index": 115, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2837999494659016 + } +] \ No newline at end of file diff --git a/tests/khive-contract/tune/tuned-config.toml b/tests/khive-contract/tune/tuned-config.toml new file mode 100644 index 0000000..23e8489 --- /dev/null +++ b/tests/khive-contract/tune/tuned-config.toml @@ -0,0 +1,17 @@ +# Winning config from khive recall param-tuning grid search +# run_date = "2026-05-25" +# recall_at_10 = 0.9333 +# mrr = 0.9500 +# mean_latency_ms = 0.29 + +[recall] +relevance_weight = 0.7 +importance_weight = 0.2 +temporal_weight = 0.1 +temporal_half_life_days = 14.0 +decay_model = "hyperbolic" +candidate_multiplier = 10 +# candidate_limit = null (use multiplier only) +fuse_strategy = {weighted = {weights = [1.0, 0.0]}} +min_score = 0.0 +min_salience = 0.0