Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.4.1] - 2026-03-14

### Fixed

- **Powi f32 exponent encoding**: `powi(n)` on f32 bytecode tapes silently produced wrong values and gradients for negative exponents (`n <= -2`). The `i32` exponent was stored as `u32` then round-tripped through `f32`, which loses precision for values > 2^24 (all negative exponents). All 5 dispatch sites (forward, reverse, tangent forward, tangent reverse, cross-country) now decode the exponent directly from the raw `u32` via `powi_exp_decode_raw`, bypassing the float conversion entirely.
- **taylor_powi negative base**: `Taylor::powi` and bytecode Taylor-mode produced NaN for negative base values (e.g. `(-2)^3`) because the implementation used `exp(n * ln(a))` which fails for `ln(negative)`. Added `taylor_powi_squaring` using binary exponentiation with `taylor_mul`, dispatched when `a[0] < 0` or `|n| <= 8`.
- **Checkpoint position lookup**: `grad_checkpointed`, `grad_checkpointed_disk`, and `grad_checkpointed_with_hints` used `Vec::contains()` for checkpoint position lookups (O(n) per step). Converted to `HashSet` for O(1) lookups.
- **Nonsmooth Round kink detection**: `forward_nonsmooth` now correctly detects Round kinks at half-integers (0.5, 1.5, ...) instead of at integers, matching the actual discontinuity locations of the `round` function. Updated test to match.

## [0.4.0] - 2026-02-26

### Changed
Expand Down Expand Up @@ -175,7 +184,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Forward-vs-reverse cross-validation on Rosenbrock, Beale, Ackley, Booth, and more
- Criterion benchmarks for forward overhead and reverse gradient

[Unreleased]: https://github.com/Entrolution/echidna/compare/v0.4.0...HEAD
[Unreleased]: https://github.com/Entrolution/echidna/compare/v0.4.1...HEAD
[0.4.1]: https://github.com/Entrolution/echidna/compare/v0.4.0...v0.4.1
[0.4.0]: https://github.com/Entrolution/echidna/compare/v0.3.0...v0.4.0
[0.3.0]: https://github.com/Entrolution/echidna/compare/v0.2.0...v0.3.0
[0.2.0]: https://github.com/Entrolution/echidna/compare/v0.1.0...v0.2.0
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ members = [".", "echidna-optim"]

[package]
name = "echidna"
version = "0.4.0"
version = "0.4.1"
edition = "2021"
rust-version = "1.93"
description = "A high-performance automatic differentiation library for Rust"
Expand Down
6 changes: 3 additions & 3 deletions SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

| Version | Supported |
|---------|-----------|
| 0.4.x | Yes |
| < 0.4 | No |
| >= 0.4.1 | Yes |
| < 0.4.1 | No |

Only the latest minor release receives security updates.
Only the latest patch release receives security updates. Versions prior to 0.4.1 have known correctness bugs (silent wrong results from `powi` on f32 tapes, NaN from `Taylor::powi` with negative base).

## Reporting a Vulnerability

Expand Down
4 changes: 2 additions & 2 deletions echidna-optim/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "echidna-optim"
version = "0.4.0"
version = "0.4.1"
edition = "2021"
rust-version = "1.93"
description = "Optimization solvers and implicit differentiation for echidna"
Expand All @@ -12,7 +12,7 @@ keywords = ["optimization", "differentiation", "implicit", "gradient", "scientif
categories = ["mathematics", "science", "algorithms"]

[dependencies]
echidna = { version = "0.4.0", path = "..", features = ["bytecode"] }
echidna = { version = "0.4.1", path = "..", features = ["bytecode"] }
num-traits = "0.2"
faer = { version = "0.24", optional = true }

Expand Down
6 changes: 3 additions & 3 deletions echidna-optim/src/solvers/lbfgs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ pub fn lbfgs<F: Float, O: Objective<F>>(
if config.memory == 0 || config.convergence.max_iter == 0 {
return OptimResult {
x: x0.to_vec(),
value: F::zero(),
gradient: vec![F::zero(); n],
gradient_norm: F::zero(),
value: F::nan(),
gradient: vec![F::nan(); n],
gradient_norm: F::nan(),
iterations: 0,
func_evals: 0,
termination: TerminationReason::NumericalError,
Expand Down
6 changes: 3 additions & 3 deletions echidna-optim/src/solvers/newton.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ pub fn newton<F: Float, O: Objective<F>>(
if config.convergence.max_iter == 0 {
return OptimResult {
x: x0.to_vec(),
value: F::zero(),
gradient: vec![F::zero(); n],
gradient_norm: F::zero(),
value: F::nan(),
gradient: vec![F::nan(); n],
gradient_norm: F::nan(),
iterations: 0,
func_evals: 0,
termination: TerminationReason::NumericalError,
Expand Down
6 changes: 3 additions & 3 deletions echidna-optim/src/solvers/trust_region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ pub fn trust_region<F: Float, O: Objective<F>>(
{
return OptimResult {
x: x0.to_vec(),
value: F::zero(),
gradient: vec![F::zero(); n],
gradient_norm: F::zero(),
value: F::nan(),
gradient: vec![F::nan(); n],
gradient_norm: F::nan(),
iterations: 0,
func_evals: 0,
termination: TerminationReason::NumericalError,
Expand Down
6 changes: 6 additions & 0 deletions src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ pub fn grad<F: Float + TapeThreadLocal>(
let output = f(&inputs);
drop(guard);

// If the output is a constant (independent of all inputs), the gradient is zero.
if output.index == crate::tape::CONSTANT {
Tape::return_to_pool(tape);
return vec![F::zero(); n];
}

// Run reverse sweep.
let adjoints = tape.reverse(output.index);

Expand Down
24 changes: 20 additions & 4 deletions src/bytecode_tape/forward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,13 @@ fn forward_dispatch<F: Float>(
op => {
let [a_idx, b_idx] = arg_indices[i];
let a = values[a_idx as usize];
let b = if b_idx != UNUSED && op != OpCode::Powi {
if op == OpCode::Powi {
let exp = opcode::powi_exp_decode_raw(b_idx);
values[i] = a.powi(exp);
continue;
}
let b = if b_idx != UNUSED {
values[b_idx as usize]
} else if op == OpCode::Powi {
F::from(b_idx).unwrap_or_else(|| F::zero())
} else {
F::zero()
};
Expand Down Expand Up @@ -128,7 +131,7 @@ impl<F: Float> super::BytecodeTape<F> {
branch: if a >= F::zero() { 1 } else { -1 },
});
}
OpCode::Floor | OpCode::Ceil | OpCode::Round | OpCode::Trunc => {
OpCode::Floor | OpCode::Ceil | OpCode::Trunc => {
// Kink at integer values. switching_value = distance to
// nearest integer: zero exactly at kink points, works
// symmetrically for both approach directions.
Expand All @@ -143,6 +146,19 @@ impl<F: Float> super::BytecodeTape<F> {
},
});
}
OpCode::Round => {
// Round has kinks at half-integers (0.5, 1.5, ...),
// not at integers. Shift by 0.5 to measure distance
// to the nearest half-integer.
let half = F::from(0.5).unwrap();
let shifted = a + half;
kinks.push(crate::nonsmooth::KinkEntry {
tape_index: i as u32,
opcode: op,
switching_value: shifted - shifted.round(),
branch: if a - a.floor() < half { 1 } else { -1 },
});
}
_ => unreachable!(),
}
}
Expand Down
53 changes: 41 additions & 12 deletions src/bytecode_tape/optimize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ impl<F: Float> super::BytecodeTape<F> {
if a != UNUSED {
stack.push(a);
}
if b != UNUSED && self.opcodes[i] != OpCode::Powi {
if self.opcodes[i] == OpCode::Custom {
// For Custom ops, b is a callback index, not a tape index.
// The actual second operand (for binary custom ops) is in custom_second_args.
if let Some(&second_arg) = self.custom_second_args.get(&(idx)) {
stack.push(second_arg);
}
} else if b != UNUSED && self.opcodes[i] != OpCode::Powi {
stack.push(b);
}
}
Expand Down Expand Up @@ -62,10 +68,13 @@ impl<F: Float> super::BytecodeTape<F> {
} else {
UNUSED
};
let rb = if b != UNUSED && self.opcodes[read] != OpCode::Powi {
let rb = if b != UNUSED
&& self.opcodes[read] != OpCode::Powi
&& self.opcodes[read] != OpCode::Custom
{
remap[b as usize]
} else {
b
b // Powi: b is encoded exponent; Custom: b is callback index
};
self.arg_indices[write] = [ra, rb];
write += 1;
Expand All @@ -77,6 +86,16 @@ impl<F: Float> super::BytecodeTape<F> {
self.values.truncate(new_len);
self.num_variables = new_len as u32;

// Remap custom_second_args: both keys and values are tape indices.
if !self.custom_second_args.is_empty() {
self.custom_second_args = self
.custom_second_args
.iter()
.filter(|(&k, _)| reachable[k as usize])
.map(|(&k, &v)| (remap[k as usize], remap[v as usize]))
.collect();
}

remap
}

Expand All @@ -97,9 +116,6 @@ impl<F: Float> super::BytecodeTape<F> {
/// `output_indices` contains only the active outputs (remapped), and
/// `output_index` is set to the first active output.
///
/// **Note**: Like `dead_code_elimination`, this does not remap
/// `custom_second_args` (pre-existing limitation).
///
/// # Panics
/// Panics if `active_outputs` is empty.
pub fn dead_code_elimination_for_outputs(&mut self, active_outputs: &[u32]) {
Expand Down Expand Up @@ -142,14 +158,20 @@ impl<F: Float> super::BytecodeTape<F> {
}

let [mut a, mut b] = self.arg_indices[i];
// Apply remap to args (except Powi exponent in b).
// Apply remap to args (except Powi exponent and Custom callback in b).
a = remap[a as usize];
if b != UNUSED && op != OpCode::Powi {
if b != UNUSED && op != OpCode::Powi && op != OpCode::Custom {
b = remap[b as usize];
}
// Update arg_indices with remapped values.
self.arg_indices[i] = [a, b];

// Skip CSE for Custom ops: the key doesn't capture custom_second_args,
// so binary custom ops with different second operands would be incorrectly merged.
if op == OpCode::Custom {
continue;
}

// Build the canonical key.
let key = if b == UNUSED {
// Unary: hash (op, arg0) only; use UNUSED as placeholder.
Expand All @@ -176,14 +198,22 @@ impl<F: Float> super::BytecodeTape<F> {
}
let [a, b] = self.arg_indices[i];
let ra = remap[a as usize];
let rb = if b != UNUSED && op != OpCode::Powi {
let rb = if b != UNUSED && op != OpCode::Powi && op != OpCode::Custom {
remap[b as usize]
} else {
b
b // Powi: b is encoded exponent; Custom: b is callback index
};
self.arg_indices[i] = [ra, rb];
}

// Remap custom_second_args values (keys are not changed by CSE remap,
// but the second operand indices may have been CSE'd).
if !self.custom_second_args.is_empty() {
for v in self.custom_second_args.values_mut() {
*v = remap[*v as usize];
}
}

// Update output indices.
self.output_index = remap[self.output_index as usize];
for oi in &mut self.output_indices {
Expand All @@ -198,8 +228,7 @@ impl<F: Float> super::BytecodeTape<F> {
///
/// In debug builds, validates internal consistency after optimization.
pub fn optimize(&mut self) {
self.cse();
self.dead_code_elimination();
self.cse(); // CSE already calls dead_code_elimination() internally.

// Validate internal consistency in debug builds.
#[cfg(debug_assertions)]
Expand Down
13 changes: 9 additions & 4 deletions src/bytecode_tape/reverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,15 @@ impl<F: Float> super::BytecodeTape<F> {
adjoints[i] = F::zero();
let [a_idx, b_idx] = self.arg_indices[i];
let a = values[a_idx as usize];
let b = if b_idx != UNUSED && op != OpCode::Powi {
if op == OpCode::Powi {
let exp = opcode::powi_exp_decode_raw(b_idx);
let n = F::from(exp).unwrap();
let da = n * a.powi(exp - 1);
adjoints[a_idx as usize] = adjoints[a_idx as usize] + da * adj;
continue;
}
let b = if b_idx != UNUSED {
values[b_idx as usize]
} else if op == OpCode::Powi {
F::from(b_idx).unwrap_or_else(|| F::zero())
} else {
F::zero()
};
Expand All @@ -74,7 +79,7 @@ impl<F: Float> super::BytecodeTape<F> {
};

adjoints[a_idx as usize] = adjoints[a_idx as usize] + da * adj;
if b_idx != UNUSED && op != OpCode::Powi {
if b_idx != UNUSED {
adjoints[b_idx as usize] = adjoints[b_idx as usize] + db * adj;
}
}
Expand Down
22 changes: 15 additions & 7 deletions src/bytecode_tape/tangent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,13 @@ impl<F: Float> super::BytecodeTape<F> {
op => {
let [a_idx, b_idx] = self.arg_indices[i];
let a = buf[a_idx as usize];
let b = if b_idx != UNUSED && op != OpCode::Powi {
if op == OpCode::Powi {
let exp = opcode::powi_exp_decode_raw(b_idx);
buf[i] = a.powi(exp);
continue;
}
let b = if b_idx != UNUSED {
buf[b_idx as usize]
} else if op == OpCode::Powi {
T::from(b_idx).unwrap_or_else(|| T::zero())
} else {
T::zero()
};
Expand Down Expand Up @@ -170,18 +173,23 @@ impl<F: Float> super::BytecodeTape<F> {

let [a_idx, b_idx] = self.arg_indices[i];
let a = tangent_vals[a_idx as usize];
let b = if b_idx != UNUSED && op != OpCode::Powi {
if op == OpCode::Powi {
let exp = opcode::powi_exp_decode_raw(b_idx);
let n = T::from(exp).unwrap();
let da = n * a.powi(exp - 1);
buf[a_idx as usize] = buf[a_idx as usize] + da * adj;
continue;
}
let b = if b_idx != UNUSED {
tangent_vals[b_idx as usize]
} else if op == OpCode::Powi {
T::from(b_idx).unwrap_or_else(|| T::zero())
} else {
T::zero()
};
let r = tangent_vals[i];
let (da, db) = opcode::reverse_partials(op, a, b, r);

buf[a_idx as usize] = buf[a_idx as usize] + da * adj;
if b_idx != UNUSED && op != OpCode::Powi {
if b_idx != UNUSED {
buf[b_idx as usize] = buf[b_idx as usize] + db * adj;
}
}
Expand Down
Loading
Loading