Skip to content

Commit 5a4edc1

Browse files
authored
Merge pull request #259 from ruvnet/feat/adr-092-moe-memory-aware-routing
feat(adr-090-092): Pi-Quantization, INT8 CNN, MoE Memory-Aware Routing
2 parents 61f293e + c53693f commit 5a4edc1

155 files changed

Lines changed: 11453 additions & 2475 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 90 additions & 90 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ members = [
108108
resolver = "2"
109109

110110
[workspace.package]
111-
version = "2.0.5"
111+
version = "2.0.6"
112112
edition = "2021"
113113
rust-version = "1.77"
114114
license = "MIT"

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,8 @@ Everything RuVector can do — organized by category. Vector search, graph queri
706706
| **RuvLTRA Models** | Pre-trained GGUF for routing & embeddings | <10ms inference → [HuggingFace](https://huggingface.co/ruv/ruvltra) |
707707
| **Streaming Tokens** | Real-time token generation | Responsive chat UX |
708708
| **Quantization** | Q4, Q5, Q8 model support | Run 7B models in 4GB RAM |
709+
| **π-Quantization (ADR-090)** | 2-bit weights via π-transform + Hadamard rotation + QAT-STE | **10 GB/s** dequantization, 16x memory reduction |
710+
| **MoE Memory-Aware Routing (ADR-092)** | Cache-aware expert selection with EMA affinity tracking | **70%+ cache hit rate**, <10µs routing latency |
709711

710712
```bash
711713
npm install @ruvector/ruvllm # Node.js
@@ -754,6 +756,7 @@ cargo add ruvector-raft ruvector-cluster ruvector-replication
754756
| Feature | What It Does | Why It Matters |
755757
|---------|--------------|----------------|
756758
| **Tensor Compression** | f32→f16→PQ8→PQ4→Binary | 2-32x memory reduction |
759+
| **INT8 CNN Quantization (ADR-091)** | Quantized Conv2D/Linear/Pooling with SIMD kernels | **4x memory reduction**, 2x faster CNN inference |
757760
| **Differentiable Search** | Soft attention k-NN | End-to-end trainable |
758761
| **Semantic Router** | Route queries to optimal endpoints | Multi-model AI orchestration |
759762
| **Hybrid Routing** | Keyword-first + embedding fallback | **90% accuracy** for agent routing |

crates/neural-trader-coherence/src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,7 @@ impl CoherenceGate for ThresholdGate {
130130
let cut_ok = ctx.mincut_value >= floor;
131131
let cusum_ok = ctx.cusum_score < self.config.cusum_threshold;
132132
let drift_ok = ctx.drift_score < self.config.max_drift_score;
133-
let boundary_ok =
134-
ctx.boundary_stable_count >= self.config.boundary_stability_windows;
133+
let boundary_ok = ctx.boundary_stable_count >= self.config.boundary_stability_windows;
135134
// Learning requires tighter drift margin (half the max).
136135
let learn_drift_ok = ctx.drift_score < self.config.max_drift_score * 0.5;
137136

crates/neural-trader-replay/src/lib.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,8 @@ pub trait MemoryStore {
8484

8585
/// Attempts to write a segment. Returns `true` if the gate allowed
8686
/// admission, `false` if rejected.
87-
fn maybe_write(
88-
&mut self,
89-
seg: ReplaySegment,
90-
gate: &CoherenceDecision,
91-
) -> anyhow::Result<bool>;
87+
fn maybe_write(&mut self, seg: ReplaySegment, gate: &CoherenceDecision)
88+
-> anyhow::Result<bool>;
9289
}
9390

9491
// ---------------------------------------------------------------------------

crates/neural-trader-wasm/src/lib.rs

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,18 @@ fn bytes16_to_hex(b: &[u8; 16]) -> String {
4545
fn hex_to_bytes16_inner(s: &str) -> Result<[u8; 16], String> {
4646
let s = s.trim();
4747
// Strip optional 0x prefix for JS ergonomics.
48-
let s = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")).unwrap_or(s);
48+
let s = s
49+
.strip_prefix("0x")
50+
.or_else(|| s.strip_prefix("0X"))
51+
.unwrap_or(s);
4952
if !s.is_ascii() || s.len() != 32 {
5053
return Err(
5154
"hex string must be exactly 32 ASCII hex chars (optional 0x prefix)".to_string(),
5255
);
5356
}
5457
let mut out = [0u8; 16];
5558
for (i, byte) in out.iter_mut().enumerate() {
56-
*byte = u8::from_str_radix(&s[i * 2..i * 2 + 2], 16)
57-
.map_err(|e| e.to_string())?;
59+
*byte = u8::from_str_radix(&s[i * 2..i * 2 + 2], 16).map_err(|e| e.to_string())?;
5860
}
5961
Ok(out)
6062
}
@@ -66,7 +68,8 @@ fn hex_to_bytes16(s: &str) -> Result<[u8; 16], JsValue> {
6668
/// Serialize using BigInt-aware serializer to avoid u64 precision loss.
6769
fn to_js<T: Serialize>(v: &T) -> Result<JsValue, JsValue> {
6870
let ser = serde_wasm_bindgen::Serializer::new().serialize_large_number_types_as_bigints(true);
69-
v.serialize(&ser).map_err(|e| JsValue::from_str(&e.to_string()))
71+
v.serialize(&ser)
72+
.map_err(|e| JsValue::from_str(&e.to_string()))
7073
}
7174

7275
// ---------------------------------------------------------------------------
@@ -143,8 +146,16 @@ enum_convert!(SegmentKindWasm <=> neural_trader_replay::SegmentKind {
143146
#[wasm_bindgen]
144147
#[derive(Clone, Copy, Debug)]
145148
pub enum NodeKindWasm {
146-
Symbol = 0, Venue = 1, PriceLevel = 2, Order = 3, Trade = 4,
147-
Event = 5, Participant = 6, TimeBucket = 7, Regime = 8, StrategyState = 9,
149+
Symbol = 0,
150+
Venue = 1,
151+
PriceLevel = 2,
152+
Order = 3,
153+
Trade = 4,
154+
Event = 5,
155+
Participant = 6,
156+
TimeBucket = 7,
157+
Regime = 8,
158+
StrategyState = 9,
148159
}
149160
enum_convert!(NodeKindWasm <=> neural_trader_core::NodeKind {
150161
Symbol, Venue, PriceLevel, Order, Trade, Event, Participant,
@@ -154,9 +165,18 @@ enum_convert!(NodeKindWasm <=> neural_trader_core::NodeKind {
154165
#[wasm_bindgen]
155166
#[derive(Clone, Copy, Debug)]
156167
pub enum EdgeKindWasm {
157-
AtLevel = 0, NextTick = 1, Generated = 2, Matched = 3, ModifiedFrom = 4,
158-
CanceledBy = 5, BelongsToSymbol = 6, OnVenue = 7, InWindow = 8,
159-
CorrelatedWith = 9, InRegime = 10, AffectsState = 11,
168+
AtLevel = 0,
169+
NextTick = 1,
170+
Generated = 2,
171+
Matched = 3,
172+
ModifiedFrom = 4,
173+
CanceledBy = 5,
174+
BelongsToSymbol = 6,
175+
OnVenue = 7,
176+
InWindow = 8,
177+
CorrelatedWith = 9,
178+
InRegime = 10,
179+
AffectsState = 11,
160180
}
161181
enum_convert!(EdgeKindWasm <=> neural_trader_core::EdgeKind {
162182
AtLevel, NextTick, Generated, Matched, ModifiedFrom, CanceledBy,
@@ -774,11 +794,7 @@ impl ReservoirStoreWasm {
774794

775795
/// Retrieve segments matching a symbol, returned as JSON array.
776796
#[wasm_bindgen(js_name = "retrieveBySymbol")]
777-
pub fn retrieve_by_symbol(
778-
&self,
779-
symbol_id: u32,
780-
limit: usize,
781-
) -> Result<JsValue, JsValue> {
797+
pub fn retrieve_by_symbol(&self, symbol_id: u32, limit: usize) -> Result<JsValue, JsValue> {
782798
let query = neural_trader_replay::MemoryQuery {
783799
symbol_id,
784800
embedding: vec![],

crates/ruvector-cnn-wasm/src/lib.rs

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
1111
#![allow(clippy::new_without_default)]
1212

13-
use wasm_bindgen::prelude::*;
14-
use ruvector_cnn::contrastive::{InfoNCELoss as RustInfoNCE, TripletLoss as RustTriplet, TripletDistance};
13+
use ruvector_cnn::contrastive::{
14+
InfoNCELoss as RustInfoNCE, TripletDistance, TripletLoss as RustTriplet,
15+
};
1516
use ruvector_cnn::simd;
17+
use wasm_bindgen::prelude::*;
1618

1719
/// Initialize panic hook for better error messages
1820
#[wasm_bindgen(start)]
@@ -94,9 +96,8 @@ impl WasmCnnEmbedder {
9496
let mean: f32 = channel_data.iter().sum::<f32>() / pixels_per_channel as f32;
9597

9698
// Variance
97-
let variance: f32 = channel_data.iter()
98-
.map(|x| (x - mean).powi(2))
99-
.sum::<f32>() / pixels_per_channel as f32;
99+
let variance: f32 = channel_data.iter().map(|x| (x - mean).powi(2)).sum::<f32>()
100+
/ pixels_per_channel as f32;
100101

101102
// Store in embedding
102103
if c * 2 < self.embedding_dim {
@@ -195,7 +196,12 @@ impl WasmInfoNCELoss {
195196
/// Compute loss for a batch of embedding pairs
196197
/// embeddings: [2N, D] flattened where (i, i+N) are positive pairs
197198
#[wasm_bindgen]
198-
pub fn forward(&self, embeddings: &[f32], batch_size: usize, dim: usize) -> Result<f32, JsValue> {
199+
pub fn forward(
200+
&self,
201+
embeddings: &[f32],
202+
batch_size: usize,
203+
dim: usize,
204+
) -> Result<f32, JsValue> {
199205
if embeddings.len() != 2 * batch_size * dim {
200206
return Err(JsValue::from_str(&format!(
201207
"Expected {} elements, got {}",
@@ -269,17 +275,29 @@ impl WasmTripletLoss {
269275
negatives: &[f32],
270276
dim: usize,
271277
) -> Result<f32, JsValue> {
272-
if anchors.len() % dim != 0 || positives.len() != anchors.len() || negatives.len() != anchors.len() {
278+
if anchors.len() % dim != 0
279+
|| positives.len() != anchors.len()
280+
|| negatives.len() != anchors.len()
281+
{
273282
return Err(JsValue::from_str("Invalid triplet dimensions"));
274283
}
275284

276285
let batch_size = anchors.len() / dim;
277286
let mut total_loss = 0.0f64;
278287

279288
for i in 0..batch_size {
280-
let a: Vec<f64> = anchors[i * dim..(i + 1) * dim].iter().map(|&x| x as f64).collect();
281-
let p: Vec<f64> = positives[i * dim..(i + 1) * dim].iter().map(|&x| x as f64).collect();
282-
let n: Vec<f64> = negatives[i * dim..(i + 1) * dim].iter().map(|&x| x as f64).collect();
289+
let a: Vec<f64> = anchors[i * dim..(i + 1) * dim]
290+
.iter()
291+
.map(|&x| x as f64)
292+
.collect();
293+
let p: Vec<f64> = positives[i * dim..(i + 1) * dim]
294+
.iter()
295+
.map(|&x| x as f64)
296+
.collect();
297+
let n: Vec<f64> = negatives[i * dim..(i + 1) * dim]
298+
.iter()
299+
.map(|&x| x as f64)
300+
.collect();
283301
total_loss += self.inner.forward(&a, &p, &n);
284302
}
285303

@@ -351,14 +369,28 @@ impl LayerOps {
351369
) -> Vec<f32> {
352370
let channels = gamma.len();
353371
let mut output = vec![0.0f32; input.len()];
354-
simd::batch_norm_simd(input, &mut output, gamma, beta, mean, var, epsilon, channels);
372+
simd::batch_norm_simd(
373+
input,
374+
&mut output,
375+
gamma,
376+
beta,
377+
mean,
378+
var,
379+
epsilon,
380+
channels,
381+
);
355382
output
356383
}
357384

358385
/// Apply global average pooling
359386
/// Returns one value per channel
360387
#[wasm_bindgen]
361-
pub fn global_avg_pool(input: &[f32], height: usize, width: usize, channels: usize) -> Vec<f32> {
388+
pub fn global_avg_pool(
389+
input: &[f32],
390+
height: usize,
391+
width: usize,
392+
channels: usize,
393+
) -> Vec<f32> {
362394
let mut output = vec![0.0f32; channels];
363395
simd::global_avg_pool_simd(input, &mut output, height, width, channels);
364396
output
@@ -382,7 +414,8 @@ mod tests {
382414
input_size: 8,
383415
embedding_dim: 64,
384416
normalize: true,
385-
})).unwrap();
417+
}))
418+
.unwrap();
386419

387420
let image_data = vec![128u8; 8 * 8 * 3];
388421
let embedding = embedder.extract(&image_data, 8, 8).unwrap();

0 commit comments

Comments
 (0)