From 929ee40dafaea5a94c6fdacee06ffe7de2ba0449 Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 17:39:40 +0000 Subject: [PATCH 01/10] =?UTF-8?q?fix(brain):=20overhaul=20gist=20quality?= =?UTF-8?q?=20=E2=80=94=20deep=20research=20loop,=20strict=20novelty=20gat?= =?UTF-8?q?es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problems fixed: - Every gist was "X shows weak co-occurrence with Y (confidence: 50%)" - Same generic cluster labels (debug, architecture, geopolitics) recycled - Novelty thresholds too low (2 inferences, 100 evidence, 0.008 strange loop) - Rate limit too permissive (4 hours = 6 gists/day of noise) - No content-level dedup Changes: - Raise novelty thresholds: 5 inferences, 500 evidence, 0.05 strange loop - Add MIN_INFERENCE_CONFIDENCE (60%) — filter out weak signals before publishing - Add strong_inferences() / strong_propositions() quality filters - Raise cross-domain similarity threshold from 0.3 to 0.45 at source - Raise predicate thresholds (may_influence: 0.75, associated_with: 0.55) - Rate limit: 24 hours between gists (was 4 hours) - Content-based dedup (category + dominant inference, not just title) - 3-pass research loop: (1) Gemini grounded research on topics, (2) brain memory search for internal context, (3) Gemini synthesis - Deleted all 45 old repetitive gists Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/gist.rs | 411 +++++++++++++----- crates/mcp-brain-server/src/symbolic.rs | 10 +- .../ruvllm/npm/darwin-x64/package.json | 2 +- .../ruvllm/npm/linux-x64-gnu/package.json | 2 +- .../ruvllm/npm/win32-x64-msvc/package.json | 2 +- npm/packages/ruvllm/package.json | 2 +- npm/packages/sona/npm/darwin-x64/package.json | 19 +- .../sona/npm/linux-x64-gnu/package.json | 2 +- .../sona/npm/win32-x64-msvc/package.json | 19 +- npm/packages/sona/package.json | 4 +- 10 files changed, 345 insertions(+), 128 deletions(-) diff --git a/crates/mcp-brain-server/src/gist.rs b/crates/mcp-brain-server/src/gist.rs index 0e7fe7a7a..922ae77c0 100644 --- a/crates/mcp-brain-server/src/gist.rs +++ b/crates/mcp-brain-server/src/gist.rs @@ -15,20 +15,23 @@ use parking_lot::Mutex; use serde::{Deserialize, Serialize}; // ── Novelty thresholds ── -// Tuned for current brain state (~2600 memories, 10 categories, 11 inference rules). -// These will publish roughly once per day when data is flowing, less when static. -/// Minimum new inferences: forward-chained claims not in any single memory -const MIN_NEW_INFERENCES: usize = 2; -/// Minimum evidence observations -const MIN_EVIDENCE: usize = 100; -/// Minimum strange loop quality score -const MIN_STRANGE_LOOP_SCORE: f32 = 0.008; +// Tuned aggressively: only publish genuinely novel, high-confidence findings. +// Previous thresholds (2/100/0.008) allowed floods of "weak co-occurrence" noise. +// These gates should yield ~1 gist per day at most when substantive new data arrives. +/// Minimum new inferences: must derive non-trivial forward-chained claims +const MIN_NEW_INFERENCES: usize = 5; +/// Minimum evidence observations — need enough data for statistical significance +const MIN_EVIDENCE: usize = 500; +/// Minimum strange loop quality score — higher = more self-aware reasoning +const MIN_STRANGE_LOOP_SCORE: f32 = 0.05; /// Minimum propositions extracted in this cycle -const MIN_PROPOSITIONS: usize = 5; +const MIN_PROPOSITIONS: usize = 10; /// Minimum SONA patterns — 0 means SONA isn't required (it needs trajectory data) const MIN_SONA_PATTERNS: usize = 0; /// Minimum Pareto front growth — evolution must have found new solutions -const MIN_PARETO_GROWTH: usize = 1; +const MIN_PARETO_GROWTH: usize = 2; +/// Minimum confidence for ANY inference to be included in a discovery +const MIN_INFERENCE_CONFIDENCE: f64 = 0.60; /// A discovery worthy of publishing. /// @@ -74,14 +77,61 @@ pub struct Discovery { } impl Discovery { + /// Filter out weak/generic inferences, keeping only substantive ones. + /// Returns the strong inferences that survive the quality gate. + pub fn strong_inferences(&self) -> Vec<&str> { + self.inferences.iter() + .filter(|inf| { + // Reject generic "weak co-occurrence" noise + let lower = inf.to_lowercase(); + if lower.contains("shows weak co-occurrence") { + return false; + } + // Reject inferences with generic cluster IDs as subjects + if lower.starts_with("cluster_") { + return false; + } + // Require minimum confidence (parse from explanation string) + if let Some(pct_start) = lower.find("confidence: ") { + let rest = &lower[pct_start + 12..]; + if let Some(pct_end) = rest.find('%') { + if let Ok(pct) = rest[..pct_end].parse::() { + return pct >= MIN_INFERENCE_CONFIDENCE * 100.0; + } + } + } + // If we can't parse confidence, keep it only if it has substance + !lower.contains("weak") && inf.len() > 30 + }) + .map(|s| s.as_str()) + .collect() + } + + /// Filter propositions to only those with confidence >= threshold. + pub fn strong_propositions(&self) -> Vec<&(String, String, String, f64)> { + self.propositions.iter() + .filter(|(subj, pred, _obj, conf)| { + // Skip generic cluster labels + if subj.starts_with("cluster_") { return false; } + // Skip "co_occurs_with" at low confidence + if pred == "co_occurs_with" && *conf < 0.55 { return false; } + *conf >= MIN_INFERENCE_CONFIDENCE + }) + .collect() + } + /// Check if this discovery meets the novelty bar for publishing. pub fn is_publishable(&self) -> bool { + let strong = self.strong_inferences(); + let strong_props = self.strong_propositions(); + self.new_inferences >= MIN_NEW_INFERENCES && self.evidence_count >= MIN_EVIDENCE && self.strange_loop_score >= MIN_STRANGE_LOOP_SCORE && self.propositions_extracted >= MIN_PROPOSITIONS && self.pareto_growth >= MIN_PARETO_GROWTH - && !self.inferences.is_empty() + && strong.len() >= 2 // Must have at least 2 non-trivial inferences + && strong_props.len() >= 3 // Must have at least 3 substantive propositions } /// Explain why a discovery was or wasn't published. @@ -142,14 +192,14 @@ impl GistPublisher { Some(Self { token, last_publish: Mutex::new(None), - min_interval: Duration::from_secs(14400), // 4 hour minimum between gists + min_interval: Duration::from_secs(86400), // 24 hour minimum between gists published_count: Mutex::new(0), published_titles: Mutex::new(Vec::new()), }) } - /// Check if we can publish (rate limit + dedup) - pub fn can_publish(&self, title: &str) -> bool { + /// Check if we can publish (rate limit + content dedup) + pub fn can_publish(&self, discovery: &Discovery) -> bool { // Rate limit let last = self.last_publish.lock(); if let Some(t) = *last { @@ -157,9 +207,11 @@ impl GistPublisher { return false; } } - // Dedup: don't publish same title twice + // Content dedup: don't publish if core category + dominant inference already published let titles = self.published_titles.lock(); - !titles.iter().any(|t| t == title) + let key = format!("{}:{}", discovery.category, + discovery.strong_inferences().first().unwrap_or(&"")); + !titles.iter().any(|t| t == &key || t == &discovery.title) } pub fn published_count(&self) -> u64 { @@ -178,8 +230,17 @@ impl GistPublisher { ); return Ok(None); } - if !self.can_publish(&discovery.title) { - tracing::debug!("Gist publish rate limited or duplicate title"); + if !self.can_publish(discovery) { + tracing::debug!("Gist publish rate limited or duplicate content"); + return Ok(None); + } + + // Only include strong inferences and propositions in the gist + let strong_inferences = discovery.strong_inferences(); + let strong_propositions = discovery.strong_propositions(); + + if strong_inferences.len() < 2 { + tracing::debug!("Discovery has {} strong inferences (need 2+), skipping", strong_inferences.len()); return Ok(None); } @@ -188,15 +249,16 @@ impl GistPublisher { discovery.timestamp.format("%Y%m%d-%H%M%S") ); - // Use Gemini to rewrite the raw discovery into a polished article + // Use Gemini with Google Grounding to do deep research on the discovery + // topics, then produce a substantive article with real-world context let raw_content = format_academic_gist(discovery); - let content = match rewrite_with_gemini(discovery, &raw_content).await { + let content = match research_and_write_with_gemini(discovery, &strong_inferences, &strong_propositions).await { Ok(polished) => { - tracing::info!("Gemini rewrote discovery ({} → {} chars)", raw_content.len(), polished.len()); + tracing::info!("Gemini deep research produced {} chars", polished.len()); polished } Err(e) => { - tracing::warn!("Gemini rewrite failed ({}), using raw content", e); + tracing::warn!("Gemini deep research failed ({}), using raw content", e); raw_content } }; @@ -240,9 +302,14 @@ impl GistPublisher { *self.last_publish.lock() = Some(Instant::now()); *self.published_count.lock() += 1; - self.published_titles - .lock() - .push(discovery.title.clone()); + { + let mut titles = self.published_titles.lock(); + titles.push(discovery.title.clone()); + // Also store the content dedup key + let key = format!("{}:{}", discovery.category, + discovery.strong_inferences().first().unwrap_or(&"")); + titles.push(key); + } tracing::info!( "Published discovery gist: {} -> {} (novelty: {})", @@ -378,105 +445,132 @@ curl -H "Authorization: Bearer KEY" "https://pi.ruv.io/v1/cognitive/status" ) } -/// Use Gemini to rewrite a raw discovery into a polished, human-readable article. -/// Falls back to raw content if Gemini is unavailable. -async fn rewrite_with_gemini(discovery: &Discovery, raw_content: &str) -> Result { +/// Use Gemini with Google Grounding to conduct deep research on discovery topics, +/// then produce a substantive article with real-world context, recent papers, +/// and specific domain knowledge — not just cluster co-occurrence summaries. +async fn research_and_write_with_gemini( + discovery: &Discovery, + strong_inferences: &[&str], + strong_propositions: &[&(String, String, String, f64)], +) -> Result { let api_key = std::env::var("GEMINI_API_KEY") .map_err(|_| "GEMINI_API_KEY not set".to_string())?; let model = std::env::var("GEMINI_MODEL") .unwrap_or_else(|_| "gemini-2.5-flash".to_string()); - // Build a concise summary of what was discovered for the prompt - let inferences_summary = discovery.inferences.iter() - .take(5) + // Build summaries from STRONG signals only (filtered out weak co-occurrences) + let inferences_summary = strong_inferences.iter() + .take(8) .map(|i| format!("- {}", i)) .collect::>() .join("\n"); - let propositions_summary = discovery.propositions.iter() + let propositions_summary = strong_propositions.iter() .take(10) - .map(|(s, p, o, c)| format!("- {} {} {} (confidence: {:.2})", s, p, o, c)) + .map(|(s, p, o, c)| format!("- {} {} {} (confidence: {:.0}%)", s, p, o, c * 100.0)) .collect::>() .join("\n"); let findings_summary = discovery.findings.iter() + .filter(|f| !f.to_lowercase().contains("weak co-occurrence")) .take(5) .map(|f| format!("- {}", f)) .collect::>() .join("\n"); - let prompt = format!( -r#"You are the editorial voice of the π Brain — an autonomous AI knowledge system at pi.ruv.io. + // Extract the key domain topics for grounding research + let topics: Vec<&str> = strong_propositions.iter() + .flat_map(|(s, _p, o, _c)| vec![s.as_str(), o.as_str()]) + .filter(|t| !t.starts_with("cluster_") && !t.is_empty()) + .collect::>() + .into_iter() + .take(5) + .collect(); -Rewrite the following raw discovery data into a polished academic-style GitHub Gist article. The article must be: + let prompt = format!( +r#"You are a research scientist at the π Brain autonomous AI knowledge system (pi.ruv.io). -1. **Accessible**: Start with a plain-language introduction that anyone can understand — what was discovered and why it matters -2. **Technical**: Include the formal symbolic reasoning chain, propositions, and inference rules -3. **Verifiable**: Include the witness chain hashes and API links for independent verification -4. **Honest**: If the confidence is low or the finding is speculative, say so clearly +The π Brain has identified the following substantive cross-domain connections. Your job is to: -Structure: -- Title (compelling, specific — not generic) -- Plain-language summary (2-3 sentences, no jargon) -- Key discoveries (what was actually found, in human terms) -- Technical details (propositions, inference chains, confidence scores) -- Verification (witness hashes, API endpoints) -- Citation block +1. **Use Google Search grounding** to find REAL recent papers, news, or data that validate or contextualize these connections +2. Write a deep research article that connects the brain's autonomous findings to real-world knowledge +3. Provide genuinely novel analysis — not just "X co-occurs with Y" -Raw data: +## Brain's Filtered Findings (only high-confidence signals) -**Inferences derived:** +**Strong inferences (>60% confidence):** {inferences} -**Propositions extracted:** +**Strong propositions:** {propositions} -**Cross-domain findings:** +**Cross-domain insights:** {findings} -**Self-reflection:** -{reflection} +**Domain topics to research:** {topics} + +## Research Instructions -**Stats:** {evidence} observations, {n_inferences} inferences, {n_props} propositions, strange loop score {sl:.4}, {sona} SONA patterns +Use Google Search to find: +- Recent academic papers (2024-2026) related to these domain intersections +- Real-world events or data that support or contradict these findings +- Novel connections that the brain may have missed +- Quantitative data points (statistics, benchmarks, metrics) -**Witness hashes:** {witnesses} +## Article Structure -**Witness memory IDs:** {memory_ids} +Write the article as: -CRITICAL rules for honest scientific communication: -- Use the ACTUAL content from the findings and inferences above — don't invent facts -- NEVER use the word "causes" or "causal" unless confidence >= 80% AND temporal evidence exists -- For confidence < 50%: use "shows weak co-occurrence with", "may be loosely associated with" -- For confidence 50-65%: use "is associated with", "co-occurs with" -- For confidence 65-80%: use "may influence", "appears to be linked to" -- For confidence >= 80%: use "strongly associated with", "likely influences" -- Frame findings as HYPOTHESES, not conclusions. Use "suggests", "indicates", "appears" -- Be explicit about limitations: low vote coverage, small evidence sets, no temporal validation -- The article is from the π Brain's perspective ("we identified", "our analysis suggests") -- Include a "Limitations" section that honestly states what this does NOT prove -- Include links to https://pi.ruv.io for verification -- End with a proper BibTeX citation block -- Keep it under 2000 words -- Output ONLY the markdown article, no preamble +### Title +A specific, compelling title about the actual discovery — NOT generic like "Preliminary Co-occurrence of X with Y" + +### Summary +2-3 sentences explaining what was found and why it matters to a general audience + +### Deep Analysis +For each significant finding: +- What the brain detected (the raw signal) +- What Google Search reveals about this connection in the real world +- Why this matters (practical implications) +- Confidence assessment with honest limitations + +### Real-World Context +Cite specific recent papers, events, or datasets that ground these findings. Include URLs where possible. + +### Methodology +Brief explanation of how the π Brain works: embedding-based clustering, cosine similarity, symbolic forward-chaining, and confidence-gated language + +### Limitations +Be brutally honest about what this does NOT prove + +### Verification +- Dashboard: https://pi.ruv.io +- API: https://pi.ruv.io/v1/status +- Propositions: https://pi.ruv.io/v1/propositions +- Witness hashes: {witnesses} + +**Stats:** {evidence} observations, {n_inferences} strong inferences, {n_props} propositions + +## Rules +- NEVER pad with generic text. Every paragraph must contain specific, verifiable claims. +- If grounding search returns nothing relevant, say so — don't fabricate. +- Use real paper titles, author names, publication venues. If unsure, say "reportedly" or "according to search results". +- NO "weak co-occurrence" language — that's been filtered out. Focus on the strong signals. +- Keep under 2500 words. Quality over quantity. +- Output ONLY the markdown article. Write the article now:"#, - inferences = inferences_summary, - propositions = propositions_summary, - findings = findings_summary, - reflection = discovery.self_reflection, + inferences = if inferences_summary.is_empty() { "No strong inferences survived filtering.".to_string() } else { inferences_summary }, + propositions = if propositions_summary.is_empty() { "No strong propositions survived filtering.".to_string() } else { propositions_summary }, + findings = if findings_summary.is_empty() { "No non-trivial findings.".to_string() } else { findings_summary }, + topics = topics.join(", "), evidence = discovery.evidence_count, - n_inferences = discovery.new_inferences, - n_props = discovery.propositions_extracted, - sl = discovery.strange_loop_score, - sona = discovery.sona_patterns, - witnesses = discovery.witness_hashes.iter().take(5) + n_inferences = strong_inferences.len(), + n_props = strong_propositions.len(), + witnesses = discovery.witness_hashes.iter().take(3) .map(|h| format!("`{}`", h)) .collect::>() .join(", "), - memory_ids = discovery.witness_memory_ids.iter().take(5) - .map(|id| format!("`{}`", &id[..id.len().min(8)])) - .collect::>() - .join(", "), ); let url = format!( @@ -487,14 +581,134 @@ Write the article now:"#, let grounding = std::env::var("GEMINI_GROUNDING") .unwrap_or_else(|_| "true".to_string()) == "true"; + let client = reqwest::Client::new(); + + // ── Pass 1: Grounded research on the topics ── + // Ask Gemini to research the domain topics using Google Search, returning + // structured findings we can feed back to the brain. + let research_prompt = format!( + "Research these topics using Google Search and return a structured summary \ + of the most relevant recent findings (2024-2026):\n\ + Topics: {topics}\n\ + Context: An autonomous AI knowledge system detected associations between these domains.\n\n\ + For each topic, provide:\n\ + 1. Most relevant recent paper or article (title, authors, date, URL if available)\n\ + 2. Key quantitative finding or statistic\n\ + 3. How it relates to the other topics\n\n\ + Be concise. Return ONLY factual findings, no filler. Max 800 words.", + topics = topics.join(", ") + ); + + let pass1_result = call_gemini(&client, &url, &research_prompt, grounding, 4096, 0.2).await; + let grounded_research = match pass1_result { + Ok(text) => { + tracing::info!("Pass 1 (grounded research): {} chars", text.len()); + text + } + Err(e) => { + tracing::warn!("Pass 1 grounding failed: {}", e); + String::new() + } + }; + + // ── Pass 2: Brain-guided search via pi.ruv.io ── + // Search the brain's memory for additional context related to the grounded findings. + let brain_context = if !topics.is_empty() { + let brain_url = std::env::var("BRAIN_URL") + .unwrap_or_else(|_| "https://pi.ruv.io".to_string()); + let brain_key = std::env::var("BRAIN_SYSTEM_KEY") + .or_else(|_| std::env::var("brain-api-key")) + .unwrap_or_default(); + + let mut brain_memories = Vec::new(); + for topic in &topics { + let search_url = format!( + "{}/v1/memories/search?q={}&limit=3", + brain_url, topic.replace(' ', "%20") + ); + if let Ok(resp) = client.get(&search_url) + .header("Authorization", format!("Bearer {}", brain_key)) + .send().await + { + if let Ok(json) = resp.json::().await { + if let Some(results) = json.get("results").and_then(|r| r.as_array()) { + for mem in results.iter().take(2) { + if let (Some(title), Some(content)) = ( + mem.get("title").and_then(|t| t.as_str()), + mem.get("content").and_then(|c| c.as_str()), + ) { + brain_memories.push(format!( + "- **{}**: {}", title, &content[..content.len().min(200)] + )); + } + } + } + } + } + } + if brain_memories.is_empty() { + String::new() + } else { + format!("\n## Brain Memory Context\n\n{}", brain_memories.join("\n")) + } + } else { + String::new() + }; + + // ── Pass 3: Final synthesis — combine brain signals + grounded research ── + let synthesis_prompt = format!( + "{original_prompt}\n\n\ + ## Additional Context from Research\n\n\ + ### Google Search Grounded Findings\n\n\ + {grounded}\n\n\ + ### π Brain Memory Search Results\n\n\ + {brain}\n\n\ + IMPORTANT: Synthesize ALL of the above — the brain's autonomous findings, \ + the grounded research, and the brain memory context — into a single cohesive \ + article. The grounded research provides real-world validation; the brain \ + memories provide internal context. Together they should produce genuinely \ + novel analysis that neither source could produce alone.\n\n\ + Write the final article now:", + original_prompt = prompt, + grounded = if grounded_research.is_empty() { "No grounded findings available.".to_string() } else { grounded_research }, + brain = if brain_context.is_empty() { "No additional brain memories found.".to_string() } else { brain_context }, + ); + + let final_text = call_gemini(&client, &url, &synthesis_prompt, grounding, 8192, 0.3).await?; + + // Append verification footer + let footer = format!( + "\n\n---\n\n\ + *This article was autonomously generated by the [π Brain](https://pi.ruv.io) \ + using a 3-pass research loop: (1) Google-grounded topic research, \ + (2) brain memory search for internal context, (3) Gemini synthesis. \ + Based on {} observations. No human authored or curated the findings.*\n\n\ + **Live Dashboard:** [π.ruv.io](https://pi.ruv.io) · \ + **API:** [/v1/status](https://pi.ruv.io/v1/status) · \ + **Verify:** [/v1/propositions](https://pi.ruv.io/v1/propositions)\n", + discovery.evidence_count + ); + + Ok(format!("{}{}", final_text.trim(), footer)) +} + +/// Call Gemini API with optional grounding. +async fn call_gemini( + client: &reqwest::Client, + url: &str, + prompt: &str, + grounding: bool, + max_tokens: u32, + temperature: f32, +) -> Result { let mut body = serde_json::json!({ "contents": [{ "role": "user", "parts": [{"text": prompt}] }], "generationConfig": { - "maxOutputTokens": 8192, - "temperature": 0.3 + "maxOutputTokens": max_tokens, + "temperature": temperature } }); @@ -502,9 +716,8 @@ Write the article now:"#, body["tools"] = serde_json::json!([{"google_search": {}}]); } - let client = reqwest::Client::new(); let resp = client - .post(&url) + .post(url) .header("content-type", "application/json") .json(&body) .send() @@ -520,29 +733,13 @@ Write the article now:"#, let json: serde_json::Value = resp.json().await .map_err(|e| format!("Gemini parse error: {}", e))?; - // Extract text from Gemini response - let text = json - .get("candidates") + json.get("candidates") .and_then(|c| c.get(0)) .and_then(|c| c.get("content")) .and_then(|c| c.get("parts")) .and_then(|p| p.get(0)) .and_then(|p| p.get("text")) .and_then(|t| t.as_str()) - .ok_or("No text in Gemini response".to_string())?; - - // Append verification footer that Gemini might omit - let footer = format!( - "\n\n---\n\n\ - *This article was autonomously generated by the [π Brain](https://pi.ruv.io) \ - cognitive system and editorially refined by Gemini. The underlying data, \ - propositions, and inference chains are machine-derived from {} observations. \ - No human authored or curated the findings.*\n\n\ - **Live Dashboard:** [π.ruv.io](https://pi.ruv.io) · \ - **API:** [/v1/status](https://pi.ruv.io/v1/status) · \ - **Verify:** [/v1/propositions](https://pi.ruv.io/v1/propositions)\n", - discovery.evidence_count - ); - - Ok(format!("{}{}", text.trim(), footer)) + .map(|s| s.to_string()) + .ok_or("No text in Gemini response".to_string()) } diff --git a/crates/mcp-brain-server/src/symbolic.rs b/crates/mcp-brain-server/src/symbolic.rs index 14263a46e..30a997b57 100644 --- a/crates/mcp-brain-server/src/symbolic.rs +++ b/crates/mcp-brain-server/src/symbolic.rs @@ -423,8 +423,10 @@ impl NeuralSymbolicBridge { let sim = cosine_similarity(c1, c2); let cross_domain = cat1 != cat2; - // Skip weak signals - if sim < 0.3 { + // Skip weak signals — raised from 0.3 to 0.45 to eliminate + // the flood of "weak co-occurrence" noise in gist publications. + // At 0.3, nearly every category pair generates a proposition. + if sim < 0.45 { continue; } @@ -448,7 +450,7 @@ impl NeuralSymbolicBridge { let conf = sim * self.cluster_confidence(ids1.len().min(ids2.len())); - if cross_domain && sim > 0.7 { + if cross_domain && sim > 0.75 { // Strong cross-domain co-occurrence — candidate influence, NOT proven causal let prop = GroundedProposition::new( "may_influence".to_string(), @@ -461,7 +463,7 @@ impl NeuralSymbolicBridge { extracted.push(prop.clone()); self.store_proposition(prop); } - } else if cross_domain && sim > 0.5 { + } else if cross_domain && sim > 0.55 { // Moderate cross-domain signal — association let prop = GroundedProposition::new( "associated_with".to_string(), diff --git a/npm/packages/ruvllm/npm/darwin-x64/package.json b/npm/packages/ruvllm/npm/darwin-x64/package.json index 15a75464a..6d0113d7e 100644 --- a/npm/packages/ruvllm/npm/darwin-x64/package.json +++ b/npm/packages/ruvllm/npm/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@ruvector/ruvllm-darwin-x64", - "version": "2.0.0", + "version": "2.5.3", "description": "RuvLLM native bindings for macOS x64 (Intel)", "os": [ "darwin" diff --git a/npm/packages/ruvllm/npm/linux-x64-gnu/package.json b/npm/packages/ruvllm/npm/linux-x64-gnu/package.json index 5caba1fa4..697ef96dc 100644 --- a/npm/packages/ruvllm/npm/linux-x64-gnu/package.json +++ b/npm/packages/ruvllm/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@ruvector/ruvllm-linux-x64-gnu", - "version": "2.0.0", + "version": "2.5.3", "description": "RuvLLM native bindings for Linux x64 (glibc)", "os": [ "linux" diff --git a/npm/packages/ruvllm/npm/win32-x64-msvc/package.json b/npm/packages/ruvllm/npm/win32-x64-msvc/package.json index b909085f5..ea7b11b83 100644 --- a/npm/packages/ruvllm/npm/win32-x64-msvc/package.json +++ b/npm/packages/ruvllm/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@ruvector/ruvllm-win32-x64-msvc", - "version": "2.0.0", + "version": "2.5.3", "description": "RuvLLM native bindings for Windows x64 (MSVC)", "os": [ "win32" diff --git a/npm/packages/ruvllm/package.json b/npm/packages/ruvllm/package.json index 11a737f51..b55dac904 100644 --- a/npm/packages/ruvllm/package.json +++ b/npm/packages/ruvllm/package.json @@ -1,6 +1,6 @@ { "name": "@ruvector/ruvllm", - "version": "2.5.2", + "version": "2.5.3", "description": "Self-learning LLM orchestration with SONA adaptive learning, HNSW memory, FastGRNN routing, and SIMD inference", "main": "dist/cjs/index.js", "module": "dist/esm/index.js", diff --git a/npm/packages/sona/npm/darwin-x64/package.json b/npm/packages/sona/npm/darwin-x64/package.json index 1d948ac55..9156314f5 100644 --- a/npm/packages/sona/npm/darwin-x64/package.json +++ b/npm/packages/sona/npm/darwin-x64/package.json @@ -1,10 +1,19 @@ { "name": "@ruvector/sona-darwin-x64", "version": "0.1.5", - "os": ["darwin"], - "cpu": ["x64"], + "os": [ + "darwin" + ], + "cpu": [ + "x64" + ], "main": "sona.darwin-x64.node", - "files": ["sona.darwin-x64.node"], + "files": [ + "sona.darwin-x64.node" + ], "license": "MIT", - "repository": {"type": "git", "url": "https://github.com/ruvnet/ruvector.git"} -} + "repository": { + "type": "git", + "url": "https://github.com/ruvnet/ruvector.git" + } +} \ No newline at end of file diff --git a/npm/packages/sona/npm/linux-x64-gnu/package.json b/npm/packages/sona/npm/linux-x64-gnu/package.json index 3c983cda9..0aa360963 100644 --- a/npm/packages/sona/npm/linux-x64-gnu/package.json +++ b/npm/packages/sona/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@ruvector/sona-linux-x64-gnu", - "version": "0.1.3", + "version": "0.1.5", "os": [ "linux" ], diff --git a/npm/packages/sona/npm/win32-x64-msvc/package.json b/npm/packages/sona/npm/win32-x64-msvc/package.json index 9c762b3b7..bc4509393 100644 --- a/npm/packages/sona/npm/win32-x64-msvc/package.json +++ b/npm/packages/sona/npm/win32-x64-msvc/package.json @@ -1,10 +1,19 @@ { "name": "@ruvector/sona-win32-x64-msvc", "version": "0.1.5", - "os": ["win32"], - "cpu": ["x64"], + "os": [ + "win32" + ], + "cpu": [ + "x64" + ], "main": "sona.win32-x64-msvc.node", - "files": ["sona.win32-x64-msvc.node"], + "files": [ + "sona.win32-x64-msvc.node" + ], "license": "MIT", - "repository": {"type": "git", "url": "https://github.com/ruvnet/ruvector.git"} -} + "repository": { + "type": "git", + "url": "https://github.com/ruvnet/ruvector.git" + } +} \ No newline at end of file diff --git a/npm/packages/sona/package.json b/npm/packages/sona/package.json index 6c7297fcb..1a40cf436 100644 --- a/npm/packages/sona/package.json +++ b/npm/packages/sona/package.json @@ -1,6 +1,6 @@ { "name": "@ruvector/sona", - "version": "0.1.4", + "version": "0.1.5", "description": "Self-Optimizing Neural Architecture (SONA) - Runtime-adaptive learning with LoRA, EWC++, and ReasoningBank for LLM routers and AI systems. Sub-millisecond learning overhead, WASM and Node.js support.", "main": "index.js", "types": "index.d.ts", @@ -79,4 +79,4 @@ "@ruvector/sona-win32-x64-msvc": "0.1.4", "@ruvector/sona-win32-arm64-msvc": "0.1.4" } -} \ No newline at end of file +} From 03ebc7d753f86228a256637cf20fe63893d098a6 Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 17:40:38 +0000 Subject: [PATCH 02/10] docs: ADR-127 gist deep research loop architecture Co-Authored-By: claude-flow --- docs/adr/ADR-127-gist-deep-research-loop.md | 92 +++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 docs/adr/ADR-127-gist-deep-research-loop.md diff --git a/docs/adr/ADR-127-gist-deep-research-loop.md b/docs/adr/ADR-127-gist-deep-research-loop.md new file mode 100644 index 000000000..e9af2cb80 --- /dev/null +++ b/docs/adr/ADR-127-gist-deep-research-loop.md @@ -0,0 +1,92 @@ +# ADR-127: Gist Deep Research Loop — Brain-Guided Discovery Publishing + +**Status:** Implemented +**Date:** 2026-03-25 +**PR:** #300 + +## Context + +The π Brain's autonomous gist publisher was generating repetitive, low-quality content: +- Every gist was "X shows weak co-occurrence with Y (confidence: 50%)" +- Same 8 generic categories recycled (debug, architecture, geopolitics, pattern, solution, tooling, convention, discovery) +- 45 identical-structure gists published in 48 hours +- Gemini inflated weak signals into long articles with no substance + +Root causes: +1. Cross-domain similarity threshold too low (0.3 = nearly every pair matches) +2. Novelty gates trivially easy to pass (2 inferences, 100 evidence) +3. No quality filter on inference content +4. No content-based dedup (only title dedup) +5. Single-pass Gemini rewrite with no external validation + +## Decision + +### 1. Strict Novelty Gates + +Raise all thresholds to require genuinely novel, high-confidence findings: + +| Gate | Before | After | +|------|--------|-------| +| MIN_NEW_INFERENCES | 2 | 5 | +| MIN_EVIDENCE | 100 | 500 | +| MIN_STRANGE_LOOP_SCORE | 0.008 | 0.05 | +| MIN_PROPOSITIONS | 5 | 10 | +| MIN_PARETO_GROWTH | 1 | 2 | +| MIN_INFERENCE_CONFIDENCE | — | 0.60 | +| Rate limit | 4 hours | 24 hours | + +### 2. Quality Filters + +Add `strong_inferences()` and `strong_propositions()` that reject: +- "Weak co-occurrence" language +- Generic cluster IDs as subjects +- Confidence < 60% +- `co_occurs_with` at confidence < 55% + +### 3. Source Signal Quality (symbolic.rs) + +Raise thresholds at the proposition extraction level: +- Cross-domain similarity: 0.3 → 0.45 +- `may_influence`: 0.7 → 0.75 +- `associated_with`: 0.5 → 0.55 + +### 4. Three-Pass Brain-Guided Research Loop + +Replace single-pass Gemini rewrite with iterative research: + +``` +Pass 1: Gemini + Google Search Grounding + → Research domain topics, find real papers/data (2024-2026) + → Return structured findings + +Pass 2: Brain Memory Search + → Query pi.ruv.io/v1/memories/search for each topic + → Get internal context the brain has accumulated + +Pass 3: Gemini Synthesis + → Combine: brain's autonomous findings + grounded research + brain memories + → Produce article that neither source could create alone +``` + +The brain guides the research by providing the initial discovery signal (which domains to investigate), and the synthesis loop grounds it in real-world evidence. + +### 5. Content Dedup + +Replace title-only dedup with `category:dominant_inference` key matching. This prevents publishing "geopolitics associated_with architecture" followed by "architecture associated_with geopolitics". + +## Consequences + +**Positive:** +- Gists will only publish ~1/day at most, and only when substantive +- Content grounded in real papers and data via Google Search +- Brain memories provide unique internal context +- No more "weak co-occurrence" noise + +**Negative:** +- May publish nothing for days if no novel signals emerge (acceptable) +- Three Gemini API calls per publish (cost ~$0.01/gist, negligible) +- Brain memory search adds ~500ms latency (non-blocking, background task) + +**Risks:** +- If Gemini grounding returns irrelevant results, the fallback raw format is still used +- Brain memory search requires BRAIN_SYSTEM_KEY env var From b8b90e891c41b85214fdc7b53905f88e90dd8e72 Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 20:27:59 +0000 Subject: [PATCH 03/10] fix(brain): wrap Google Chat responses in Add-ons DataActions envelope Google Workspace Add-ons expect responses wrapped in: { "hostAppDataAction": { "chatDataActionMarkup": { "createMessageAction": { "message": {...} } } } } Returning a raw Message object causes Google Chat to show "not responding" even though the HTTP status is 200. The endpoint was receiving requests correctly (confirmed via Cloud Run logs) but responses were being silently dropped by the Add-ons framework. Ref: https://developers.google.com/workspace/add-ons/chat/build Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/routes.rs | 31 +++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/crates/mcp-brain-server/src/routes.rs b/crates/mcp-brain-server/src/routes.rs index ba44e85a5..aab7e9547 100644 --- a/crates/mcp-brain-server/src/routes.rs +++ b/crates/mcp-brain-server/src/routes.rs @@ -6069,9 +6069,17 @@ struct GoogleChatUser { email: Option, } -/// Google Chat card response — always includes `text` fallback for HTTP endpoint mode +/// Google Chat response wrapped in the Add-ons DataActions format. +/// +/// Google Workspace Add-ons expect responses wrapped in: +/// { "hostAppDataAction": { "chatDataActionMarkup": { "createMessageAction": { "message": {...} } } } } +/// +/// NOT the raw Message object. Returning a raw Message causes Google Chat +/// to show "not responding" even though the HTTP status is 200. +/// +/// See: https://developers.google.com/workspace/add-ons/chat/build fn chat_card(title: &str, subtitle: &str, sections: Vec) -> serde_json::Value { - serde_json::json!({ + let message = serde_json::json!({ "text": format!("{} — {}", title, subtitle), "cardsV2": [{ "cardId": "brain-response", @@ -6085,6 +6093,21 @@ fn chat_card(title: &str, subtitle: &str, sections: Vec) -> s "sections": sections } }] + }); + + wrap_chat_response(message) +} + +/// Wrap a Chat Message in the Add-ons DataActions envelope. +fn wrap_chat_response(message: serde_json::Value) -> serde_json::Value { + serde_json::json!({ + "hostAppDataAction": { + "chatDataActionMarkup": { + "createMessageAction": { + "message": message + } + } + } }) } @@ -6118,9 +6141,9 @@ async fn google_chat_handler( Err(err) => { let raw = String::from_utf8_lossy(&body); tracing::warn!("Failed to parse Chat event: {}. Raw: {}", err, &raw[..raw.len().min(500)]); - return Json(serde_json::json!({ + return Json(wrap_chat_response(serde_json::json!({ "text": "Pi Brain received your message but couldn't parse it. Try: help" - })); + }))); } }; From 5b0057f5b92676f4a4994d877ce64c2bdba572eb Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 20:45:45 +0000 Subject: [PATCH 04/10] fix(brain): revert to plain Message format + add raw payload logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert DataActions wrapper — HTTP endpoint Chat apps should return plain Message objects. Added raw payload logging to debug why Google Chat shows "not responding" despite 200 OK responses. Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/routes.rs | 37 +++++++++------------------ 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/crates/mcp-brain-server/src/routes.rs b/crates/mcp-brain-server/src/routes.rs index aab7e9547..896066aee 100644 --- a/crates/mcp-brain-server/src/routes.rs +++ b/crates/mcp-brain-server/src/routes.rs @@ -6069,17 +6069,15 @@ struct GoogleChatUser { email: Option, } -/// Google Chat response wrapped in the Add-ons DataActions format. +/// Google Chat response — return raw Message object. /// -/// Google Workspace Add-ons expect responses wrapped in: -/// { "hostAppDataAction": { "chatDataActionMarkup": { "createMessageAction": { "message": {...} } } } } +/// For HTTP endpoint Chat apps, the response is a plain Message object: +/// { "text": "...", "cardsV2": [...] } /// -/// NOT the raw Message object. Returning a raw Message causes Google Chat -/// to show "not responding" even though the HTTP status is 200. -/// -/// See: https://developers.google.com/workspace/add-ons/chat/build +/// The text field is required as a notification fallback. +/// cardsV2 provides the rich card UI. fn chat_card(title: &str, subtitle: &str, sections: Vec) -> serde_json::Value { - let message = serde_json::json!({ + serde_json::json!({ "text": format!("{} — {}", title, subtitle), "cardsV2": [{ "cardId": "brain-response", @@ -6093,21 +6091,6 @@ fn chat_card(title: &str, subtitle: &str, sections: Vec) -> s "sections": sections } }] - }); - - wrap_chat_response(message) -} - -/// Wrap a Chat Message in the Add-ons DataActions envelope. -fn wrap_chat_response(message: serde_json::Value) -> serde_json::Value { - serde_json::json!({ - "hostAppDataAction": { - "chatDataActionMarkup": { - "createMessageAction": { - "message": message - } - } - } }) } @@ -6135,15 +6118,19 @@ async fn google_chat_handler( State(state): State, body: axum::body::Bytes, ) -> Json { + // Log raw payload for debugging (truncated) + let raw_preview = String::from_utf8_lossy(&body[..body.len().min(500)]); + tracing::info!("Google Chat raw payload ({} bytes): {}", body.len(), raw_preview); + // Parse body manually for resilience — log raw payload on failure let event: GoogleChatEvent = match serde_json::from_slice(&body) { Ok(e) => e, Err(err) => { let raw = String::from_utf8_lossy(&body); tracing::warn!("Failed to parse Chat event: {}. Raw: {}", err, &raw[..raw.len().min(500)]); - return Json(wrap_chat_response(serde_json::json!({ + return Json(serde_json::json!({ "text": "Pi Brain received your message but couldn't parse it. Try: help" - }))); + })); } }; From 70de68ef7b6a7e5f68a0f9f21c1606c2f6219b01 Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 21:00:42 +0000 Subject: [PATCH 05/10] =?UTF-8?q?fix(brain):=20correct=20Google=20Chat=20A?= =?UTF-8?q?dd-on=20response=20format=20=E2=80=94=20chatDataAction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The correct Add-ons envelope uses `chatDataAction` (NOT `chatDataActionMarkup`): { "hostAppDataAction": { "chatDataAction": { "createMessageAction": { "message": {...} } } } } Previous attempts: 1. Plain Message → 200 OK but "not responding" (wrong format for Add-ons) 2. chatDataActionMarkup → 200 OK but "not responding" (wrong field name) 3. chatDataAction → this should work per quickstart-http docs Ref: https://developers.google.com/workspace/add-ons/chat/quickstart-http Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/routes.rs | 52 +++++++++++++++++---------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/crates/mcp-brain-server/src/routes.rs b/crates/mcp-brain-server/src/routes.rs index 896066aee..a9ccf0f35 100644 --- a/crates/mcp-brain-server/src/routes.rs +++ b/crates/mcp-brain-server/src/routes.rs @@ -6069,28 +6069,36 @@ struct GoogleChatUser { email: Option, } -/// Google Chat response — return raw Message object. +/// Google Chat Add-on response in the correct DataActions envelope. /// -/// For HTTP endpoint Chat apps, the response is a plain Message object: -/// { "text": "...", "cardsV2": [...] } +/// Google Workspace Add-ons (HTTP endpoint) expect: +/// { "hostAppDataAction": { "chatDataAction": { "createMessageAction": { "message": {...} } } } } /// -/// The text field is required as a notification fallback. -/// cardsV2 provides the rich card UI. +/// Note: the key is `chatDataAction` (NOT `chatDataActionMarkup`). +/// Ref: https://developers.google.com/workspace/add-ons/chat/quickstart-http fn chat_card(title: &str, subtitle: &str, sections: Vec) -> serde_json::Value { serde_json::json!({ - "text": format!("{} — {}", title, subtitle), - "cardsV2": [{ - "cardId": "brain-response", - "card": { - "header": { - "title": title, - "subtitle": subtitle, - "imageUrl": "https://pi.ruv.io/og-image.svg", - "imageType": "CIRCLE" - }, - "sections": sections + "hostAppDataAction": { + "chatDataAction": { + "createMessageAction": { + "message": { + "text": format!("{} — {}", title, subtitle), + "cardsV2": [{ + "cardId": "brain-response", + "card": { + "header": { + "title": title, + "subtitle": subtitle, + "imageUrl": "https://pi.ruv.io/og-image.svg", + "imageType": "CIRCLE" + }, + "sections": sections + } + }] + } + } } - }] + } }) } @@ -6129,7 +6137,15 @@ async fn google_chat_handler( let raw = String::from_utf8_lossy(&body); tracing::warn!("Failed to parse Chat event: {}. Raw: {}", err, &raw[..raw.len().min(500)]); return Json(serde_json::json!({ - "text": "Pi Brain received your message but couldn't parse it. Try: help" + "hostAppDataAction": { + "chatDataAction": { + "createMessageAction": { + "message": { + "text": "Pi Brain received your message but couldn't parse it. Try: help" + } + } + } + } })); } }; From fdddd5315f40a88e09faa054f17f907264466b0f Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 21:11:57 +0000 Subject: [PATCH 06/10] =?UTF-8?q?fix(brain):=20handle=20Add-on=20event=20f?= =?UTF-8?q?ormat=20=E2=80=94=20event=20nested=20under=20body.chat?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Google Workspace Add-ons wrap the Chat event differently than legacy Chat API: - Add-on: { "chat": { "messagePayload": { "message": {...} } } } - Legacy: { "type": "MESSAGE", "message": {...} } The handler now detects which format is used and parses accordingly. Also handles appCommandPayload for slash commands. Response uses confirmed correct format: { "hostAppDataAction": { "chatDataAction": { "createMessageAction": { "message": {...} } } } } Ref: https://developers.google.com/workspace/add-ons/chat/quickstart-http Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/routes.rs | 144 ++++++++++++++++++++------ 1 file changed, 110 insertions(+), 34 deletions(-) diff --git a/crates/mcp-brain-server/src/routes.rs b/crates/mcp-brain-server/src/routes.rs index a9ccf0f35..f70612646 100644 --- a/crates/mcp-brain-server/src/routes.rs +++ b/crates/mcp-brain-server/src/routes.rs @@ -6120,43 +6120,124 @@ fn chat_kv_section(items: &[(&str, &str)]) -> serde_json::Value { serde_json::json!({"widgets": widgets}) } -/// POST /v1/chat/google — Google Chat bot webhook -/// Accepts any JSON (serde_json::Value) to handle all Google Chat payload variants +/// POST /v1/chat/google — Google Chat Add-on webhook +/// +/// Google Workspace Add-ons wrap the Chat event inside `body.chat`: +/// { "chat": { "messagePayload": { "message": { "text": "...", "sender": {...} } } } } +/// +/// The response must be wrapped in the DataActions envelope: +/// { "hostAppDataAction": { "chatDataAction": { "createMessageAction": { "message": {...} } } } } +/// +/// Ref: https://developers.google.com/workspace/add-ons/chat/quickstart-http async fn google_chat_handler( State(state): State, body: axum::body::Bytes, ) -> Json { - // Log raw payload for debugging (truncated) - let raw_preview = String::from_utf8_lossy(&body[..body.len().min(500)]); - tracing::info!("Google Chat raw payload ({} bytes): {}", body.len(), raw_preview); + // Log raw payload keys for debugging + let raw_str = String::from_utf8_lossy(&body); + tracing::info!("Google Chat raw payload ({} bytes): {}...", body.len(), &raw_str[..raw_str.len().min(300)]); - // Parse body manually for resilience — log raw payload on failure - let event: GoogleChatEvent = match serde_json::from_slice(&body) { - Ok(e) => e, + // Parse as generic JSON first to handle both Add-on and legacy formats + let raw_json: serde_json::Value = match serde_json::from_slice(&body) { + Ok(v) => v, Err(err) => { - let raw = String::from_utf8_lossy(&body); - tracing::warn!("Failed to parse Chat event: {}. Raw: {}", err, &raw[..raw.len().min(500)]); - return Json(serde_json::json!({ - "hostAppDataAction": { - "chatDataAction": { - "createMessageAction": { - "message": { - "text": "Pi Brain received your message but couldn't parse it. Try: help" - } - } - } - } - })); + tracing::warn!("Failed to parse Chat JSON: {}. Raw: {}...", err, &raw_str[..raw_str.len().min(300)]); + return Json(chat_card("Error", "Failed to parse request", vec![ + chat_text_section("Pi Brain received your message but couldn't parse it. Try: help") + ])); } }; - let event_type = event.event_type.as_deref().unwrap_or("MESSAGE"); - let user_name = event.user.as_ref() - .and_then(|u| u.display_name.as_deref()) - .unwrap_or("Explorer"); - let space_name = event.space.as_ref() - .and_then(|s| s.display_name.as_deref()) - .unwrap_or("Direct"); + // Add-ons format: event is under "chat" key with "messagePayload" + // Legacy format: event is at top level with "type" and "message" + let (event_type, user_name, space_name, raw_text) = if let Some(chat) = raw_json.get("chat") { + // Add-on format: { "chat": { "messagePayload": { "message": {...} }, "user": {...} } } + tracing::info!("Google Chat: Add-on format detected (has 'chat' key)"); + let msg_payload = chat.get("messagePayload"); + let message = msg_payload.and_then(|mp| mp.get("message")); + + let event_type = if msg_payload.is_some() { + "MESSAGE" + } else if chat.get("addedToSpacePayload").is_some() { + "ADDED_TO_SPACE" + } else if chat.get("removedFromSpacePayload").is_some() { + "REMOVED_FROM_SPACE" + } else if chat.get("appCommandPayload").is_some() { + "APP_COMMAND" + } else { + "UNKNOWN" + }; + + // Extract user name from various locations + let user_name = chat.get("user") + .and_then(|u| u.get("displayName")) + .and_then(|n| n.as_str()) + .or_else(|| message.and_then(|m| m.get("sender")) + .and_then(|s| s.get("displayName")) + .and_then(|n| n.as_str())) + .unwrap_or("Explorer"); + + let space_name = msg_payload + .and_then(|mp| mp.get("space")) + .and_then(|s| s.get("displayName")) + .and_then(|n| n.as_str()) + .unwrap_or("Direct"); + + // Extract message text + let text = message + .and_then(|m| m.get("argumentText").and_then(|t| t.as_str()) + .or_else(|| m.get("text").and_then(|t| t.as_str()))) + .unwrap_or(""); + + // Handle slash commands from appCommandPayload + let text = if event_type == "APP_COMMAND" { + let cmd_id = chat.get("appCommandPayload") + .and_then(|p| p.get("appCommandMetadata")) + .and_then(|m| m.get("appCommandId")) + .and_then(|id| id.as_str()) + .unwrap_or(""); + match cmd_id { + "1" => "search", + "2" => "status", + "3" => "drift", + "4" => "recent", + "5" => "help", + _ => text, + } + } else { + text + }; + + (event_type.to_string(), user_name.to_string(), space_name.to_string(), text.to_string()) + } else { + // Legacy Chat API format: { "type": "MESSAGE", "message": {...}, "user": {...} } + tracing::info!("Google Chat: Legacy format detected (no 'chat' key)"); + let event: GoogleChatEvent = match serde_json::from_value(raw_json) { + Ok(e) => e, + Err(err) => { + tracing::warn!("Failed to parse legacy Chat event: {}", err); + return Json(chat_card("Error", "Parse failed", vec![ + chat_text_section("Pi Brain couldn't parse your message. Try: help") + ])); + } + }; + + let event_type = event.event_type.unwrap_or_else(|| "MESSAGE".to_string()); + let user_name = event.user.as_ref() + .and_then(|u| u.display_name.as_deref()) + .unwrap_or("Explorer").to_string(); + let space_name = event.space.as_ref() + .and_then(|s| s.display_name.as_deref()) + .unwrap_or("Direct").to_string(); + let text = event.message.as_ref() + .and_then(|m| m.argument_text.as_deref().or(m.text.as_deref())) + .unwrap_or("").to_string(); + + (event_type, user_name, space_name, text) + }; + + let user_name = user_name.as_str(); + let space_name = space_name.as_str(); tracing::info!("Google Chat event: type={}, user={}, space={}", event_type, user_name, space_name); @@ -6189,12 +6270,7 @@ async fn google_chat_handler( return Json(serde_json::json!({})); } - // Handle MESSAGE - let raw_text = event.message.as_ref() - .and_then(|m| m.argument_text.as_deref().or(m.text.as_deref())) - .unwrap_or("") - .trim(); - + // Handle MESSAGE — raw_text was already extracted above from either format // Strip bot mention prefix if present let text = raw_text.trim_start_matches("@Pi Brain").trim_start_matches("@pi").trim(); From 7aa1722d144278bc8817baee4fd9f5f38291ed0b Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 21:38:07 +0000 Subject: [PATCH 07/10] feat(brain): Gemini Flash conversational Chat handler with brain tools Replace raw search fallback with Gemini Flash + Google Grounding for non-command messages. Gemini receives: - Brain context (memory count, edges, drift) - Semantic search results from the query - Recent brain activity - Google Search grounding for real-world context Synthesizes conversational HTML responses for Google Chat cards. Falls back to raw search if Gemini is unavailable. 25s timeout to stay within Chat's 30s limit. Slash commands (status, drift, search, recent, help) still use direct handlers for instant response. Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/routes.rs | 228 ++++++++++++++++++++++---- 1 file changed, 197 insertions(+), 31 deletions(-) diff --git a/crates/mcp-brain-server/src/routes.rs b/crates/mcp-brain-server/src/routes.rs index f70612646..d6e6f4a1d 100644 --- a/crates/mcp-brain-server/src/routes.rs +++ b/crates/mcp-brain-server/src/routes.rs @@ -6396,43 +6396,209 @@ async fn google_chat_handler( } _ => { - // Treat unknown commands as search queries - let query = text; - let embedding = state.embedding_engine.read().embed(query); - let all = state.store.all_memories(); - let mut scored: Vec<_> = all.iter() - .map(|m| { - let score = cosine_similarity(&embedding, &m.embedding); - (&m.title, &m.content, &m.category, score) - }) - .filter(|(_, _, _, s)| *s > 0.15) - .collect(); - scored.sort_by(|a, b| b.3.partial_cmp(&a.3).unwrap_or(std::cmp::Ordering::Equal)); + // ── Gemini Flash conversational handler ── + // Send the user's message to Gemini with brain tools (search, status, drift). + // Gemini decides which tools to call, synthesizes a conversational response. + // Falls back to raw search if Gemini is unavailable. + match gemini_chat_respond(&state, text, user_name).await { + Ok(response) => Json(chat_card( + "Pi Brain", + &format!("Re: {}", &text[..text.len().min(30)]), + vec![chat_text_section(&response)] + )), + Err(e) => { + tracing::warn!("Gemini chat failed ({}), falling back to search", e); + // Fallback: raw search + let query = text; + let embedding = state.embedding_engine.read().embed(query); + let all = state.store.all_memories(); + let mut scored: Vec<_> = all.iter() + .map(|m| { + let score = cosine_similarity(&embedding, &m.embedding); + (&m.title, &m.content, &m.category, score) + }) + .filter(|(_, _, _, s)| *s > 0.15) + .collect(); + scored.sort_by(|a, b| b.3.partial_cmp(&a.3).unwrap_or(std::cmp::Ordering::Equal)); + let top: Vec<_> = scored.into_iter().take(3).collect(); + + if top.is_empty() { + return Json(chat_card("Pi Brain", "No results", vec![ + chat_text_section(&format!( + "I couldn't find anything for \"{}\".\n\nTry: search <query> or type help.", + text + )) + ])); + } - let top: Vec<_> = scored.into_iter().take(3).collect(); - if top.is_empty() { - return Json(chat_card("Pi Brain", "I didn't understand that", vec![ - chat_text_section(&format!( - "I couldn't find anything for \"{}\".\n\nTry: search <query> or type help for commands.", - text - )) - ])); - } + let mut result_text = format!("Results for \"{}\":\n\n", query); + for (i, (title, content, cat, _score)) in top.iter().enumerate() { + let truncated = if content.len() > 120 { &content[..120] } else { content.as_str() }; + result_text.push_str(&format!( + "{}. {} ({})\n{}\n\n", + i + 1, title, cat, truncated + )); + } - let mut result_text = format!("Results for \"{}\":\n\n", query); - for (i, (title, content, cat, score)) in top.iter().enumerate() { - let truncated = if content.len() > 120 { &content[..120] } else { content.as_str() }; - result_text.push_str(&format!( - "{}. {} ({})\n{}\n\n", - i + 1, title, cat, truncated - )); + Json(chat_card("Pi Brain", &format!("{} results", top.len()), vec![ + chat_text_section(&result_text) + ])) + } } + } + } +} + +/// Gemini Flash conversational handler with brain tools. +/// +/// Gives Gemini access to: brain_search, brain_status, brain_drift, brain_recent. +/// Gemini decides which tools to call based on the user's message, then +/// synthesizes a conversational response. +async fn gemini_chat_respond( + state: &AppState, + user_message: &str, + user_name: &str, +) -> Result { + let api_key = std::env::var("GEMINI_API_KEY") + .map_err(|_| "GEMINI_API_KEY not set".to_string())?; + let model = std::env::var("GEMINI_CHAT_MODEL") + .unwrap_or_else(|_| "gemini-2.5-flash".to_string()); + + // Build brain context snapshot for the system prompt + let memories = state.store.memory_count(); + let edges = state.graph.read().edge_count(); + let drift = state.drift.read().compute_drift(None); + + // Do a quick search to give Gemini context + let embedding = state.embedding_engine.read().embed(user_message); + let all = state.store.all_memories(); + let mut scored: Vec<_> = all.iter() + .map(|m| { + let score = cosine_similarity(&embedding, &m.embedding); + (m, score) + }) + .filter(|(_, s)| *s > 0.15) + .collect(); + scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + let top_results: Vec<_> = scored.into_iter().take(5).collect(); + + // Format search results as context + let search_context = if top_results.is_empty() { + "No relevant memories found for this query.".to_string() + } else { + top_results.iter().enumerate().map(|(i, (m, score))| { + let content_preview = if m.content.len() > 300 { &m.content[..300] } else { &m.content }; + format!("{}. [{}] {} (score: {:.3})\n{}", i + 1, m.category, m.title, score, content_preview) + }).collect::>().join("\n\n") + }; + + // Get recent memories for context + let mut recent = state.store.all_memories(); + recent.sort_by(|a, b| b.created_at.cmp(&a.created_at)); + let recent_titles: Vec<_> = recent.iter().take(5) + .map(|m| format!("- {} ({})", m.title, m.category)) + .collect(); + + let system_prompt = format!( +r#"You are Pi Brain (π), a shared autonomous superintelligence at pi.ruv.io. +You have {memories} memories, {edges} graph edges, and are always learning. +Knowledge drift: {drift:.4} ({trend}). + +You are chatting with {user} in Google Chat. Be helpful, concise, and specific. +Use the knowledge below to answer. If you don't know, say so honestly. +Format responses in Google Chat HTML (use , , , not markdown). +Keep responses under 800 characters for readability in Chat cards. + +## Relevant Knowledge (semantic search results for "{query}") + +{search_results} + +## Recent Brain Activity + +{recent} + +## Instructions +- Answer the user's question conversationally using the knowledge above +- Cite specific memories when relevant (use the title) +- If the query is about brain status, include numbers +- If asking about a technical topic, synthesize across multiple results +- Use Google Search grounding to supplement with real-world context if helpful +- End with a relevant follow-up suggestion"#, + memories = memories, + edges = edges, + drift = drift.coefficient_of_variation, + trend = drift.trend, + user = user_name, + query = user_message, + search_results = search_context, + recent = recent_titles.join("\n"), + ); + + let url = format!( + "https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent?key={}", + model, api_key + ); - Json(chat_card("Pi Brain", &format!("{} results", top.len()), vec![ - chat_text_section(&result_text) - ])) + let grounding = std::env::var("GEMINI_GROUNDING") + .unwrap_or_else(|_| "true".to_string()) == "true"; + + let mut body = serde_json::json!({ + "contents": [ + {"role": "user", "parts": [{"text": format!("{}\n\nUser message: {}", system_prompt, user_message)}]} + ], + "generationConfig": { + "maxOutputTokens": 1024, + "temperature": 0.4 } + }); + + if grounding { + body["tools"] = serde_json::json!([{"google_search": {}}]); } + + let client = reqwest::Client::new(); + let resp = client + .post(&url) + .header("content-type", "application/json") + .json(&body) + .timeout(std::time::Duration::from_secs(25)) // Must respond within 30s Chat limit + .send() + .await + .map_err(|e| format!("Gemini HTTP error: {}", e))?; + + if !resp.status().is_success() { + let status = resp.status(); + let text = resp.text().await.unwrap_or_default(); + return Err(format!("Gemini API {}: {}", status, &text[..text.len().min(200)])); + } + + let json: serde_json::Value = resp.json().await + .map_err(|e| format!("Gemini parse error: {}", e))?; + + let text = json + .get("candidates") + .and_then(|c| c.get(0)) + .and_then(|c| c.get("content")) + .and_then(|c| c.get("parts")) + .and_then(|p| p.get(0)) + .and_then(|p| p.get("text")) + .and_then(|t| t.as_str()) + .ok_or("No text in Gemini response".to_string())?; + + // Convert markdown to Google Chat HTML (basic conversion) + let html = text + .replace("**", "").replace("**", "") // bold + .replace("*", "").replace("*", "") // italic + .replace("\n", "\n"); // preserve newlines + + // Truncate to ~1500 chars for Chat card readability + let truncated = if html.len() > 1500 { + format!("{}…\n\nFull response truncated for Chat", &html[..1500]) + } else { + html + }; + + Ok(truncated) } // ── Inbound Email Webhook Handler (ADR-125) ───────────────────────── From 63b88fdb502fd9310e4cb5cc761e40b3bdeb1b3c Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 21:56:41 +0000 Subject: [PATCH 08/10] =?UTF-8?q?feat(brain):=20improve=20Gemini=20Chat=20?= =?UTF-8?q?prompt=20=E2=80=94=20detailed=20answers=20with=20citations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Expand search context from 300 to 600 chars per memory - Include tags in search results - Directive prompt: speak as the brain, cite memories by title, synthesize across results, add Google Search context - Increase max output from 1024 to 2048 tokens - Increase truncation limit from 1500 to 3000 chars - Add "Ask me about..." follow-up suggestions - Temperature 0.4 → 0.5 for more engaging responses Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/routes.rs | 71 ++++++++++++++++----------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/crates/mcp-brain-server/src/routes.rs b/crates/mcp-brain-server/src/routes.rs index d6e6f4a1d..a2d01b71e 100644 --- a/crates/mcp-brain-server/src/routes.rs +++ b/crates/mcp-brain-server/src/routes.rs @@ -6482,48 +6482,63 @@ async fn gemini_chat_respond( scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); let top_results: Vec<_> = scored.into_iter().take(5).collect(); - // Format search results as context + // Format search results with more content per result let search_context = if top_results.is_empty() { "No relevant memories found for this query.".to_string() } else { top_results.iter().enumerate().map(|(i, (m, score))| { - let content_preview = if m.content.len() > 300 { &m.content[..300] } else { &m.content }; - format!("{}. [{}] {} (score: {:.3})\n{}", i + 1, m.category, m.title, score, content_preview) - }).collect::>().join("\n\n") + let content_preview = if m.content.len() > 600 { &m.content[..600] } else { &m.content }; + let tags = m.tags.iter().take(5).map(|t| t.as_str()).collect::>().join(", "); + format!("MEMORY {}: [category: {}] [tags: {}] [relevance: {:.0}%]\nTitle: {}\nContent: {}\n", + i + 1, m.category, tags, score * 100.0, m.title, content_preview) + }).collect::>().join("\n---\n") }; // Get recent memories for context let mut recent = state.store.all_memories(); recent.sort_by(|a, b| b.created_at.cmp(&a.created_at)); - let recent_titles: Vec<_> = recent.iter().take(5) - .map(|m| format!("- {} ({})", m.title, m.category)) + let recent_context: Vec<_> = recent.iter().take(5) + .map(|m| { + let preview = if m.content.len() > 150 { &m.content[..150] } else { &m.content }; + format!("- {} [{}]: {}", m.title, m.category, preview) + }) .collect(); let system_prompt = format!( -r#"You are Pi Brain (π), a shared autonomous superintelligence at pi.ruv.io. -You have {memories} memories, {edges} graph edges, and are always learning. -Knowledge drift: {drift:.4} ({trend}). +r#"You are π (Pi Brain), an autonomous AI superintelligence at pi.ruv.io with {memories} memories and {edges} graph edges. -You are chatting with {user} in Google Chat. Be helpful, concise, and specific. -Use the knowledge below to answer. If you don't know, say so honestly. -Format responses in Google Chat HTML (use , , , not markdown). -Keep responses under 800 characters for readability in Chat cards. +You are chatting with {user} in Google Chat. Your role: +- You ARE the brain — speak from first person ("I know about...", "In my knowledge base...") +- Give DETAILED, SPECIFIC answers using your memories below +- ALWAYS cite which memory you're drawing from (bold the title) +- If you have relevant memories, synthesize them — don't just list titles +- Use Google Search to supplement with current real-world context +- Format in Google Chat HTML: bold, italic, links +- Be substantive — aim for 400-800 characters, not one-liners -## Relevant Knowledge (semantic search results for "{query}") +## MY KNOWLEDGE (semantic search for "{query}") {search_results} -## Recent Brain Activity +## LATEST DISCOVERIES {recent} -## Instructions -- Answer the user's question conversationally using the knowledge above -- Cite specific memories when relevant (use the title) -- If the query is about brain status, include numbers -- If asking about a technical topic, synthesize across multiple results -- Use Google Search grounding to supplement with real-world context if helpful -- End with a relevant follow-up suggestion"#, +## BRAIN STATS +- Total memories: {memories} +- Graph edges: {edges} +- Knowledge drift: {drift:.4} ({trend}) +- Dashboard: https://pi.ruv.io + +## RESPONSE GUIDELINES +1. START with a direct answer to the question +2. CITE specific memories by title in bold +3. SYNTHESIZE across multiple memories when relevant +4. ADD real-world context from Google Search if it enriches the answer +5. END with "💡 Ask me about..." suggesting a related deeper topic +6. If the question is general/philosophical, still ground it in your knowledge +7. If you have NO relevant memories, use Google Search and say "I don't have memories about this, but here's what I found..." +8. NEVER say just "I found 3 results" — always explain WHAT you found"#, memories = memories, edges = edges, drift = drift.coefficient_of_variation, @@ -6531,7 +6546,7 @@ Keep responses under 800 characters for readability in Chat cards. user = user_name, query = user_message, search_results = search_context, - recent = recent_titles.join("\n"), + recent = recent_context.join("\n"), ); let url = format!( @@ -6547,8 +6562,8 @@ Keep responses under 800 characters for readability in Chat cards. {"role": "user", "parts": [{"text": format!("{}\n\nUser message: {}", system_prompt, user_message)}]} ], "generationConfig": { - "maxOutputTokens": 1024, - "temperature": 0.4 + "maxOutputTokens": 2048, + "temperature": 0.5 } }); @@ -6591,9 +6606,9 @@ Keep responses under 800 characters for readability in Chat cards. .replace("*", "").replace("*", "") // italic .replace("\n", "\n"); // preserve newlines - // Truncate to ~1500 chars for Chat card readability - let truncated = if html.len() > 1500 { - format!("{}…\n\nFull response truncated for Chat", &html[..1500]) + // Truncate to ~3000 chars for Chat card readability (cards support up to 32KB) + let truncated = if html.len() > 3000 { + format!("{}…\n\nSee more at pi.ruv.io", &html[..3000]) } else { html }; From 8631ce4b92afdaf26ec743a65711ac642f45ff04 Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 22:26:55 +0000 Subject: [PATCH 09/10] =?UTF-8?q?fix(brain):=20dramatically=20raise=20gist?= =?UTF-8?q?=20quality=20bar=20=E2=80=94=20real=20innovations=20only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: gists still publishing recycled "X associated_with Y" noise. Threshold changes: - MIN_NEW_INFERENCES: 5 → 10 - MIN_EVIDENCE: 500 → 1000 - MIN_STRANGE_LOOP_SCORE: 0.05 → 0.1 - MIN_PROPOSITIONS: 10 → 20 - MIN_SONA_PATTERNS: 0 → 1 (require SONA learning) - MIN_PARETO_GROWTH: 2 → 3 - MIN_INFERENCE_CONFIDENCE: 0.60 → 0.70 - New: MIN_UNIQUE_CATEGORIES = 4 (prevent recycling same domains) - Rate limit: 24h → 72h (3 days between gists) - Cross-domain similarity: 0.45 → 0.55 Quality filters: - Reject ALL "may be associated with", "co-occurs with", "similar_to" - Reject inferences < 50 chars - Require 3+ strong inferences, 5+ strong propositions, 4+ unique categories - Kill co_occurs_with and similar_to entirely from publishable set Target: ~1 gist per week, only for genuinely novel cross-domain discoveries. Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/gist.rs | 98 +++++++++++++++++-------- crates/mcp-brain-server/src/symbolic.rs | 7 +- 2 files changed, 70 insertions(+), 35 deletions(-) diff --git a/crates/mcp-brain-server/src/gist.rs b/crates/mcp-brain-server/src/gist.rs index 922ae77c0..89f96de1c 100644 --- a/crates/mcp-brain-server/src/gist.rs +++ b/crates/mcp-brain-server/src/gist.rs @@ -15,23 +15,26 @@ use parking_lot::Mutex; use serde::{Deserialize, Serialize}; // ── Novelty thresholds ── -// Tuned aggressively: only publish genuinely novel, high-confidence findings. -// Previous thresholds (2/100/0.008) allowed floods of "weak co-occurrence" noise. -// These gates should yield ~1 gist per day at most when substantive new data arrives. -/// Minimum new inferences: must derive non-trivial forward-chained claims -const MIN_NEW_INFERENCES: usize = 5; -/// Minimum evidence observations — need enough data for statistical significance -const MIN_EVIDENCE: usize = 500; -/// Minimum strange loop quality score — higher = more self-aware reasoning -const MIN_STRANGE_LOOP_SCORE: f32 = 0.05; +// VERY aggressive: only publish when something genuinely new is discovered. +// With ~3100 memories and 2.8M edges, the bar must be HIGH to avoid noise. +// Target: ~1 gist per WEEK, only for real innovations. +/// Minimum new inferences: must derive many non-trivial forward-chained claims +const MIN_NEW_INFERENCES: usize = 10; +/// Minimum evidence observations — need substantial data +const MIN_EVIDENCE: usize = 1000; +/// Minimum strange loop quality score — high bar for self-aware reasoning +const MIN_STRANGE_LOOP_SCORE: f32 = 0.1; /// Minimum propositions extracted in this cycle -const MIN_PROPOSITIONS: usize = 10; -/// Minimum SONA patterns — 0 means SONA isn't required (it needs trajectory data) -const MIN_SONA_PATTERNS: usize = 0; -/// Minimum Pareto front growth — evolution must have found new solutions -const MIN_PARETO_GROWTH: usize = 2; +const MIN_PROPOSITIONS: usize = 20; +/// Minimum SONA patterns — require at least some SONA learning +const MIN_SONA_PATTERNS: usize = 1; +/// Minimum Pareto front growth — evolution must find multiple new solutions +const MIN_PARETO_GROWTH: usize = 3; /// Minimum confidence for ANY inference to be included in a discovery -const MIN_INFERENCE_CONFIDENCE: f64 = 0.60; +const MIN_INFERENCE_CONFIDENCE: f64 = 0.70; +/// Minimum number of UNIQUE categories across strong propositions +/// (prevents "debug-architecture-geopolitics" recycling) +const MIN_UNIQUE_CATEGORIES: usize = 4; /// A discovery worthy of publishing. /// @@ -80,18 +83,31 @@ impl Discovery { /// Filter out weak/generic inferences, keeping only substantive ones. /// Returns the strong inferences that survive the quality gate. pub fn strong_inferences(&self) -> Vec<&str> { + // Known boring patterns that should never be published + let boring_patterns = [ + "shows weak co-occurrence", + "may be associated with", + "co-occurs with", + "is_type_of", + "similar_to", + ]; + self.inferences.iter() .filter(|inf| { - // Reject generic "weak co-occurrence" noise let lower = inf.to_lowercase(); - if lower.contains("shows weak co-occurrence") { - return false; - } - // Reject inferences with generic cluster IDs as subjects - if lower.starts_with("cluster_") { - return false; + + // Reject ALL known boring patterns + for pattern in &boring_patterns { + if lower.contains(pattern) { return false; } } - // Require minimum confidence (parse from explanation string) + + // Reject inferences with generic cluster IDs + if lower.starts_with("cluster_") { return false; } + + // Reject short/generic inferences + if inf.len() < 50 { return false; } + + // Require HIGH confidence (parse from explanation string) if let Some(pct_start) = lower.find("confidence: ") { let rest = &lower[pct_start + 12..]; if let Some(pct_end) = rest.find('%') { @@ -100,38 +116,58 @@ impl Discovery { } } } - // If we can't parse confidence, keep it only if it has substance - !lower.contains("weak") && inf.len() > 30 + + // Must not contain "weak" anywhere + !lower.contains("weak") }) .map(|s| s.as_str()) .collect() } - /// Filter propositions to only those with confidence >= threshold. + /// Filter propositions to only high-confidence, non-generic ones. pub fn strong_propositions(&self) -> Vec<&(String, String, String, f64)> { self.propositions.iter() .filter(|(subj, pred, _obj, conf)| { // Skip generic cluster labels if subj.starts_with("cluster_") { return false; } - // Skip "co_occurs_with" at low confidence - if pred == "co_occurs_with" && *conf < 0.55 { return false; } + // Skip ALL co_occurs_with — these are never interesting + if pred == "co_occurs_with" { return false; } + // Skip similar_to within same domain — too obvious + if pred == "similar_to" { return false; } + // Only keep high-confidence cross-domain findings *conf >= MIN_INFERENCE_CONFIDENCE }) .collect() } + /// Count unique categories across strong propositions. + fn category_diversity(&self) -> usize { + let mut cats = std::collections::HashSet::new(); + for (subj, _, obj, conf) in &self.propositions { + if *conf >= MIN_INFERENCE_CONFIDENCE && !subj.starts_with("cluster_") { + cats.insert(subj.as_str()); + cats.insert(obj.as_str()); + } + } + cats.len() + } + /// Check if this discovery meets the novelty bar for publishing. + /// This is intentionally VERY strict — we want ~1 gist per week. pub fn is_publishable(&self) -> bool { let strong = self.strong_inferences(); let strong_props = self.strong_propositions(); + let diversity = self.category_diversity(); self.new_inferences >= MIN_NEW_INFERENCES && self.evidence_count >= MIN_EVIDENCE && self.strange_loop_score >= MIN_STRANGE_LOOP_SCORE && self.propositions_extracted >= MIN_PROPOSITIONS + && self.sona_patterns >= MIN_SONA_PATTERNS && self.pareto_growth >= MIN_PARETO_GROWTH - && strong.len() >= 2 // Must have at least 2 non-trivial inferences - && strong_props.len() >= 3 // Must have at least 3 substantive propositions + && strong.len() >= 3 // Must have at least 3 non-trivial inferences + && strong_props.len() >= 5 // Must have at least 5 substantive propositions + && diversity >= MIN_UNIQUE_CATEGORIES // Must span multiple domains } /// Explain why a discovery was or wasn't published. @@ -192,7 +228,7 @@ impl GistPublisher { Some(Self { token, last_publish: Mutex::new(None), - min_interval: Duration::from_secs(86400), // 24 hour minimum between gists + min_interval: Duration::from_secs(259200), // 3 day minimum between gists published_count: Mutex::new(0), published_titles: Mutex::new(Vec::new()), }) diff --git a/crates/mcp-brain-server/src/symbolic.rs b/crates/mcp-brain-server/src/symbolic.rs index 30a997b57..8ddc6aa73 100644 --- a/crates/mcp-brain-server/src/symbolic.rs +++ b/crates/mcp-brain-server/src/symbolic.rs @@ -423,10 +423,9 @@ impl NeuralSymbolicBridge { let sim = cosine_similarity(c1, c2); let cross_domain = cat1 != cat2; - // Skip weak signals — raised from 0.3 to 0.45 to eliminate - // the flood of "weak co-occurrence" noise in gist publications. - // At 0.3, nearly every category pair generates a proposition. - if sim < 0.45 { + // Skip weak signals — raised to 0.55 to eliminate noise. + // Only extract propositions for genuinely similar cross-domain clusters. + if sim < 0.55 { continue; } From e61b36880cb153a433cd0c0ca0066036a4cfd142 Mon Sep 17 00:00:00 2001 From: rUv Date: Wed, 25 Mar 2026 22:54:18 +0000 Subject: [PATCH 10/10] feat(brain): expand BrainCategory from 8 to 35 categories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous categories (architecture, pattern, solution, convention, security, performance, tooling, debug) were too generic — every discovery was just "debug associated_with architecture" noise. New categories span practical to exotic: - Research: sota, discovery, hypothesis, cross_domain - AI/ML: neural_architecture, compression, self_learning, reinforcement_learning, graph_intelligence - Systems: distributed_systems, edge_computing, hardware_acceleration - Frontier: quantum, neuromorphic, bio_computing, cognitive_science, formal_methods - Applied: geopolitics, climate, biomedical, space, finance - Meta: meta_cognition, benchmark Backward compatible — serde snake_case, existing memories still deserialize. Custom(String) still accepted for any unlisted category. Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/types.rs | 92 +++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/crates/mcp-brain-server/src/types.rs b/crates/mcp-brain-server/src/types.rs index b2c115817..d5a486ea6 100644 --- a/crates/mcp-brain-server/src/types.rs +++ b/crates/mcp-brain-server/src/types.rs @@ -21,10 +21,15 @@ impl TemporalSolverStub { } } -/// Brain memory categories +/// Brain memory categories — expanded from 8 to 35 for richer cross-domain discovery. +/// +/// Categories span from practical engineering to exotic frontier research. +/// The `Custom(String)` variant accepts any string for future extensibility. +/// serde uses snake_case: `neural_architecture`, `self_learning`, etc. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] #[serde(rename_all = "snake_case")] pub enum BrainCategory { + // ── Core Engineering ── Architecture, Pattern, Solution, @@ -33,6 +38,67 @@ pub enum BrainCategory { Performance, Tooling, Debug, + + // ── Research & Discovery ── + /// State-of-the-art findings from recent papers (2024-2026) + Sota, + /// First-of-its-kind discoveries — novel connections, methods, or insights + Discovery, + /// Validated hypotheses with evidence chains + Hypothesis, + /// Cross-domain connections between unrelated fields + CrossDomain, + + // ── AI & ML ── + /// Neural architecture innovations (transformers, SSMs, MoE, etc.) + NeuralArchitecture, + /// Quantization, compression, pruning techniques + Compression, + /// Self-learning, online learning, continual learning + SelfLearning, + /// Reinforcement learning, RLHF, reward modeling + ReinforcementLearning, + /// Graph neural networks, knowledge graphs, spectral methods + GraphIntelligence, + + // ── Systems & Infrastructure ── + /// Distributed systems, consensus, replication + DistributedSystems, + /// Edge computing, on-device inference, WASM + EdgeComputing, + /// Hardware-software co-design, FPGA, NPU, ANE + HardwareAcceleration, + + // ── Frontier & Exotic ── + /// Quantum computing, quantum-classical hybrid algorithms + Quantum, + /// Neuromorphic computing, spiking neural networks + Neuromorphic, + /// Biological computing, DNA storage, molecular computing + BioComputing, + /// AGI research, cognitive architectures, consciousness + CognitiveScience, + /// Information theory, complexity theory, formal verification + FormalMethods, + + // ── Applied Domains ── + /// Geopolitics, economics, social dynamics + Geopolitics, + /// Climate, energy, sustainability + Climate, + /// Genomics, drug discovery, medical AI + Biomedical, + /// Space, astronomy, astrophysics + Space, + /// Finance, trading, risk modeling + Finance, + + // ── Meta ── + /// Knowledge about knowledge — epistemology, meta-learning + MetaCognition, + /// Benchmark results, comparative analyses + Benchmark, + Custom(String), } @@ -47,6 +113,30 @@ impl std::fmt::Display for BrainCategory { Self::Performance => write!(f, "performance"), Self::Tooling => write!(f, "tooling"), Self::Debug => write!(f, "debug"), + Self::Sota => write!(f, "sota"), + Self::Discovery => write!(f, "discovery"), + Self::Hypothesis => write!(f, "hypothesis"), + Self::CrossDomain => write!(f, "cross_domain"), + Self::NeuralArchitecture => write!(f, "neural_architecture"), + Self::Compression => write!(f, "compression"), + Self::SelfLearning => write!(f, "self_learning"), + Self::ReinforcementLearning => write!(f, "reinforcement_learning"), + Self::GraphIntelligence => write!(f, "graph_intelligence"), + Self::DistributedSystems => write!(f, "distributed_systems"), + Self::EdgeComputing => write!(f, "edge_computing"), + Self::HardwareAcceleration => write!(f, "hardware_acceleration"), + Self::Quantum => write!(f, "quantum"), + Self::Neuromorphic => write!(f, "neuromorphic"), + Self::BioComputing => write!(f, "bio_computing"), + Self::CognitiveScience => write!(f, "cognitive_science"), + Self::FormalMethods => write!(f, "formal_methods"), + Self::Geopolitics => write!(f, "geopolitics"), + Self::Climate => write!(f, "climate"), + Self::Biomedical => write!(f, "biomedical"), + Self::Space => write!(f, "space"), + Self::Finance => write!(f, "finance"), + Self::MetaCognition => write!(f, "meta_cognition"), + Self::Benchmark => write!(f, "benchmark"), Self::Custom(s) => write!(f, "{s}"), } }