From d1e8e8aa1be735ac4733a5815d1d4083d71099cd Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Thu, 12 Mar 2026 23:33:44 -0700
Subject: [PATCH] Formatting

---
 NAM/conv1d.cpp                           | 355 +++++++++++------------
 NAM/convnet.cpp                          |   4 +-
 NAM/dsp.cpp                              | 115 ++++----
 NAM/dsp.h                                |   4 +-
 NAM/film.h                               |   4 +-
 NAM/gating_activations.h                 |   4 +-
 NAM/lstm.cpp                             |   4 +-
 tools/render.cpp                         |   8 +-
 tools/test/test_container.cpp            |  28 +-
 tools/test/test_noncontiguous_blocks.cpp |  16 +-
 10 files changed, 273 insertions(+), 269 deletions(-)

diff --git a/NAM/conv1d.cpp b/NAM/conv1d.cpp
index f8ec91b3..b561786c 100644
--- a/NAM/conv1d.cpp
+++ b/NAM/conv1d.cpp
@@ -272,9 +272,9 @@ void Conv1D::Process(const Eigen::MatrixXf& input, const int num_frames)
     {
       // Fused 4x4 kernel_size=3: read all 3 input blocks and compute in one pass
       const long dil = this->_dilation;
-      auto in0 = _input_buffer.Read(num_frames, 2 * dil);  // oldest (k=0)
-      auto in1 = _input_buffer.Read(num_frames, dil);      // middle (k=1)
-      auto in2 = _input_buffer.Read(num_frames, 0);        // newest (k=2)
+      auto in0 = _input_buffer.Read(num_frames, 2 * dil); // oldest (k=0)
+      auto in1 = _input_buffer.Read(num_frames, dil); // middle (k=1)
+      auto in2 = _input_buffer.Read(num_frames, 0); // newest (k=2)
 
       const float* __restrict__ in0_ptr = in0.data();
       const float* __restrict__ in1_ptr = in1.data();
@@ -282,7 +282,7 @@ void Conv1D::Process(const Eigen::MatrixXf& input, const int num_frames)
       float* __restrict__ output_ptr = _output.data();
 
       // Get weight pointers for all 3 taps
-      const size_t wsize = 16;  // 4x4
+      const size_t wsize = 16; // 4x4
       const float* __restrict__ w0 = this->_weight[0].data();
       const float* __restrict__ w1 = this->_weight[1].data();
       const float* __restrict__ w2 = this->_weight[2].data();
@@ -371,10 +371,7 @@ void Conv1D::Process(const Eigen::MatrixXf& input, const int num_frames)
       auto in4 = _input_buffer.Read(num_frames, dil);
       auto in5 = _input_buffer.Read(num_frames, 0);
 
-      const float* __restrict__ in_ptrs[6] = {
-        in0.data(), in1.data(), in2.data(),
-        in3.data(), in4.data(), in5.data()
-      };
+      const float* __restrict__ in_ptrs[6] = {in0.data(), in1.data(), in2.data(), in3.data(), in4.data(), in5.data()};
       float* __restrict__ output_ptr = _output.data();
 
       // Cache all 54 weights on stack (6 taps x 3x3 matrix, column-major)
@@ -408,207 +405,207 @@ void Conv1D::Process(const Eigen::MatrixXf& input, const int num_frames)
     }
     else
     {
-    // General inline GEMM path uses += accumulation, so needs setZero
-    _output.leftCols(num_frames).setZero();
+      // General inline GEMM path uses += accumulation, so needs setZero
+      _output.leftCols(num_frames).setZero();
 
-    // General inline GEMM path for other configurations
-    for (size_t k = 0; k < kernel_size; k++)
-    {
-      const long offset = this->_dilation * (k + 1 - (long)kernel_size);
-      const long lookback = -offset;
-      auto input_block = _input_buffer.Read(num_frames, lookback);
+      // General inline GEMM path for other configurations
+      for (size_t k = 0; k < kernel_size; k++)
+      {
+        const long offset = this->_dilation * (k + 1 - (long)kernel_size);
+        const long lookback = -offset;
+        auto input_block = _input_buffer.Read(num_frames, lookback);
 
-      const float* __restrict__ input_ptr = input_block.data();
-      const float* __restrict__ weight_ptr = this->_weight[k].data();
-      float* __restrict__ output_ptr = _output.data();
+        const float* __restrict__ input_ptr = input_block.data();
+        const float* __restrict__ weight_ptr = this->_weight[k].data();
+        float* __restrict__ output_ptr = _output.data();
 
-      // Specialized fully-unrolled paths for common small channel counts
-      // These avoid all loop overhead for the tiny matrices in NAM models
-      if (out_ch == 2 && in_ch == 2)
-      {
-        // 2x2 fully unrolled
-        const float w00 = weight_ptr[0], w10 = weight_ptr[1];
-        const float w01 = weight_ptr[2], w11 = weight_ptr[3];
-        for (int f = 0; f < num_frames; f++)
+        // Specialized fully-unrolled paths for common small channel counts
+        // These avoid all loop overhead for the tiny matrices in NAM models
+        if (out_ch == 2 && in_ch == 2)
         {
-          const float i0 = input_ptr[f * 2];
-          const float i1 = input_ptr[f * 2 + 1];
-          output_ptr[f * 2] += w00 * i0 + w01 * i1;
-          output_ptr[f * 2 + 1] += w10 * i0 + w11 * i1;
+          // 2x2 fully unrolled
+          const float w00 = weight_ptr[0], w10 = weight_ptr[1];
+          const float w01 = weight_ptr[2], w11 = weight_ptr[3];
+          for (int f = 0; f < num_frames; f++)
+          {
+            const float i0 = input_ptr[f * 2];
+            const float i1 = input_ptr[f * 2 + 1];
+            output_ptr[f * 2] += w00 * i0 + w01 * i1;
+            output_ptr[f * 2 + 1] += w10 * i0 + w11 * i1;
+          }
         }
-      }
-      else if (out_ch == 2 && in_ch == 4)
-      {
-        // 2x4 fully unrolled
-        const float w00 = weight_ptr[0], w10 = weight_ptr[1];
-        const float w01 = weight_ptr[2], w11 = weight_ptr[3];
-        const float w02 = weight_ptr[4], w12 = weight_ptr[5];
-        const float w03 = weight_ptr[6], w13 = weight_ptr[7];
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 2 && in_ch == 4)
         {
-          const float i0 = input_ptr[f * 4];
-          const float i1 = input_ptr[f * 4 + 1];
-          const float i2 = input_ptr[f * 4 + 2];
-          const float i3 = input_ptr[f * 4 + 3];
-          output_ptr[f * 2] += w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
-          output_ptr[f * 2 + 1] += w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
+          // 2x4 fully unrolled
+          const float w00 = weight_ptr[0], w10 = weight_ptr[1];
+          const float w01 = weight_ptr[2], w11 = weight_ptr[3];
+          const float w02 = weight_ptr[4], w12 = weight_ptr[5];
+          const float w03 = weight_ptr[6], w13 = weight_ptr[7];
+          for (int f = 0; f < num_frames; f++)
+          {
+            const float i0 = input_ptr[f * 4];
+            const float i1 = input_ptr[f * 4 + 1];
+            const float i2 = input_ptr[f * 4 + 2];
+            const float i3 = input_ptr[f * 4 + 3];
+            output_ptr[f * 2] += w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
+            output_ptr[f * 2 + 1] += w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
+          }
         }
-      }
-      else if (out_ch == 4 && in_ch == 1)
-      {
-        // 4x1 fully unrolled
-        const float w0 = weight_ptr[0], w1 = weight_ptr[1];
-        const float w2 = weight_ptr[2], w3 = weight_ptr[3];
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 4 && in_ch == 1)
         {
-          const float in_val = input_ptr[f];
-          output_ptr[f * 4] += w0 * in_val;
-          output_ptr[f * 4 + 1] += w1 * in_val;
-          output_ptr[f * 4 + 2] += w2 * in_val;
-          output_ptr[f * 4 + 3] += w3 * in_val;
+          // 4x1 fully unrolled
+          const float w0 = weight_ptr[0], w1 = weight_ptr[1];
+          const float w2 = weight_ptr[2], w3 = weight_ptr[3];
+          for (int f = 0; f < num_frames; f++)
+          {
+            const float in_val = input_ptr[f];
+            output_ptr[f * 4] += w0 * in_val;
+            output_ptr[f * 4 + 1] += w1 * in_val;
+            output_ptr[f * 4 + 2] += w2 * in_val;
+            output_ptr[f * 4 + 3] += w3 * in_val;
+          }
         }
-      }
-      else if (out_ch == 4 && in_ch == 4)
-      {
-        // 4x4 fully unrolled - cache weights in registers
-        const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2], w30 = weight_ptr[3];
-        const float w01 = weight_ptr[4], w11 = weight_ptr[5], w21 = weight_ptr[6], w31 = weight_ptr[7];
-        const float w02 = weight_ptr[8], w12 = weight_ptr[9], w22 = weight_ptr[10], w32 = weight_ptr[11];
-        const float w03 = weight_ptr[12], w13 = weight_ptr[13], w23 = weight_ptr[14], w33 = weight_ptr[15];
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 4 && in_ch == 4)
         {
-          const int in_off = f * 4;
-          const int out_off = f * 4;
-          const float i0 = input_ptr[in_off];
-          const float i1 = input_ptr[in_off + 1];
-          const float i2 = input_ptr[in_off + 2];
-          const float i3 = input_ptr[in_off + 3];
-          output_ptr[out_off] += w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
-          output_ptr[out_off + 1] += w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
-          output_ptr[out_off + 2] += w20 * i0 + w21 * i1 + w22 * i2 + w23 * i3;
-          output_ptr[out_off + 3] += w30 * i0 + w31 * i1 + w32 * i2 + w33 * i3;
+          // 4x4 fully unrolled - cache weights in registers
+          const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2], w30 = weight_ptr[3];
+          const float w01 = weight_ptr[4], w11 = weight_ptr[5], w21 = weight_ptr[6], w31 = weight_ptr[7];
+          const float w02 = weight_ptr[8], w12 = weight_ptr[9], w22 = weight_ptr[10], w32 = weight_ptr[11];
+          const float w03 = weight_ptr[12], w13 = weight_ptr[13], w23 = weight_ptr[14], w33 = weight_ptr[15];
+          for (int f = 0; f < num_frames; f++)
+          {
+            const int in_off = f * 4;
+            const int out_off = f * 4;
+            const float i0 = input_ptr[in_off];
+            const float i1 = input_ptr[in_off + 1];
+            const float i2 = input_ptr[in_off + 2];
+            const float i3 = input_ptr[in_off + 3];
+            output_ptr[out_off] += w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
+            output_ptr[out_off + 1] += w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
+            output_ptr[out_off + 2] += w20 * i0 + w21 * i1 + w22 * i2 + w23 * i3;
+            output_ptr[out_off + 3] += w30 * i0 + w31 * i1 + w32 * i2 + w33 * i3;
+          }
         }
-      }
-      else if (out_ch == 3 && in_ch == 1)
-      {
-        // 3x1 fully unrolled
-        const float w0 = weight_ptr[0], w1 = weight_ptr[1], w2 = weight_ptr[2];
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 3 && in_ch == 1)
         {
-          const float in_val = input_ptr[f];
-          output_ptr[f * 3] += w0 * in_val;
-          output_ptr[f * 3 + 1] += w1 * in_val;
-          output_ptr[f * 3 + 2] += w2 * in_val;
+          // 3x1 fully unrolled
+          const float w0 = weight_ptr[0], w1 = weight_ptr[1], w2 = weight_ptr[2];
+          for (int f = 0; f < num_frames; f++)
+          {
+            const float in_val = input_ptr[f];
+            output_ptr[f * 3] += w0 * in_val;
+            output_ptr[f * 3 + 1] += w1 * in_val;
+            output_ptr[f * 3 + 2] += w2 * in_val;
+          }
         }
-      }
-      else if (out_ch == 3 && in_ch == 3)
-      {
-        // 3x3 fully unrolled
-        const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2];
-        const float w01 = weight_ptr[3], w11 = weight_ptr[4], w21 = weight_ptr[5];
-        const float w02 = weight_ptr[6], w12 = weight_ptr[7], w22 = weight_ptr[8];
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 3 && in_ch == 3)
         {
-          const int off = f * 3;
-          const float i0 = input_ptr[off];
-          const float i1 = input_ptr[off + 1];
-          const float i2 = input_ptr[off + 2];
-          output_ptr[off] += w00 * i0 + w01 * i1 + w02 * i2;
-          output_ptr[off + 1] += w10 * i0 + w11 * i1 + w12 * i2;
-          output_ptr[off + 2] += w20 * i0 + w21 * i1 + w22 * i2;
+          // 3x3 fully unrolled
+          const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2];
+          const float w01 = weight_ptr[3], w11 = weight_ptr[4], w21 = weight_ptr[5];
+          const float w02 = weight_ptr[6], w12 = weight_ptr[7], w22 = weight_ptr[8];
+          for (int f = 0; f < num_frames; f++)
+          {
+            const int off = f * 3;
+            const float i0 = input_ptr[off];
+            const float i1 = input_ptr[off + 1];
+            const float i2 = input_ptr[off + 2];
+            output_ptr[off] += w00 * i0 + w01 * i1 + w02 * i2;
+            output_ptr[off + 1] += w10 * i0 + w11 * i1 + w12 * i2;
+            output_ptr[off + 2] += w20 * i0 + w21 * i1 + w22 * i2;
+          }
         }
-      }
-      else if (out_ch == 4 && in_ch == 3)
-      {
-        // 4x3 fully unrolled
-        const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2], w30 = weight_ptr[3];
-        const float w01 = weight_ptr[4], w11 = weight_ptr[5], w21 = weight_ptr[6], w31 = weight_ptr[7];
-        const float w02 = weight_ptr[8], w12 = weight_ptr[9], w22 = weight_ptr[10], w32 = weight_ptr[11];
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 4 && in_ch == 3)
         {
-          const float i0 = input_ptr[f * 3];
-          const float i1 = input_ptr[f * 3 + 1];
-          const float i2 = input_ptr[f * 3 + 2];
-          output_ptr[f * 4] += w00 * i0 + w01 * i1 + w02 * i2;
-          output_ptr[f * 4 + 1] += w10 * i0 + w11 * i1 + w12 * i2;
-          output_ptr[f * 4 + 2] += w20 * i0 + w21 * i1 + w22 * i2;
-          output_ptr[f * 4 + 3] += w30 * i0 + w31 * i1 + w32 * i2;
+          // 4x3 fully unrolled
+          const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2], w30 = weight_ptr[3];
+          const float w01 = weight_ptr[4], w11 = weight_ptr[5], w21 = weight_ptr[6], w31 = weight_ptr[7];
+          const float w02 = weight_ptr[8], w12 = weight_ptr[9], w22 = weight_ptr[10], w32 = weight_ptr[11];
+          for (int f = 0; f < num_frames; f++)
+          {
+            const float i0 = input_ptr[f * 3];
+            const float i1 = input_ptr[f * 3 + 1];
+            const float i2 = input_ptr[f * 3 + 2];
+            output_ptr[f * 4] += w00 * i0 + w01 * i1 + w02 * i2;
+            output_ptr[f * 4 + 1] += w10 * i0 + w11 * i1 + w12 * i2;
+            output_ptr[f * 4 + 2] += w20 * i0 + w21 * i1 + w22 * i2;
+            output_ptr[f * 4 + 3] += w30 * i0 + w31 * i1 + w32 * i2;
+          }
         }
-      }
-      else if (out_ch == 3 && in_ch == 4)
-      {
-        // 3x4 fully unrolled
-        const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2];
-        const float w01 = weight_ptr[3], w11 = weight_ptr[4], w21 = weight_ptr[5];
-        const float w02 = weight_ptr[6], w12 = weight_ptr[7], w22 = weight_ptr[8];
-        const float w03 = weight_ptr[9], w13 = weight_ptr[10], w23 = weight_ptr[11];
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 3 && in_ch == 4)
         {
-          const float i0 = input_ptr[f * 4];
-          const float i1 = input_ptr[f * 4 + 1];
-          const float i2 = input_ptr[f * 4 + 2];
-          const float i3 = input_ptr[f * 4 + 3];
-          output_ptr[f * 3] += w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
-          output_ptr[f * 3 + 1] += w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
-          output_ptr[f * 3 + 2] += w20 * i0 + w21 * i1 + w22 * i2 + w23 * i3;
+          // 3x4 fully unrolled
+          const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2];
+          const float w01 = weight_ptr[3], w11 = weight_ptr[4], w21 = weight_ptr[5];
+          const float w02 = weight_ptr[6], w12 = weight_ptr[7], w22 = weight_ptr[8];
+          const float w03 = weight_ptr[9], w13 = weight_ptr[10], w23 = weight_ptr[11];
+          for (int f = 0; f < num_frames; f++)
+          {
+            const float i0 = input_ptr[f * 4];
+            const float i1 = input_ptr[f * 4 + 1];
+            const float i2 = input_ptr[f * 4 + 2];
+            const float i3 = input_ptr[f * 4 + 3];
+            output_ptr[f * 3] += w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
+            output_ptr[f * 3 + 1] += w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
+            output_ptr[f * 3 + 2] += w20 * i0 + w21 * i1 + w22 * i2 + w23 * i3;
+          }
         }
-      }
-      else if (out_ch == 6 && in_ch == 1)
-      {
-        // 6x1 fully unrolled
-        const float w0 = weight_ptr[0], w1 = weight_ptr[1], w2 = weight_ptr[2];
-        const float w3 = weight_ptr[3], w4 = weight_ptr[4], w5 = weight_ptr[5];
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 6 && in_ch == 1)
         {
-          const float in_val = input_ptr[f];
-          const int off = f * 6;
-          output_ptr[off] += w0 * in_val;
-          output_ptr[off + 1] += w1 * in_val;
-          output_ptr[off + 2] += w2 * in_val;
-          output_ptr[off + 3] += w3 * in_val;
-          output_ptr[off + 4] += w4 * in_val;
-          output_ptr[off + 5] += w5 * in_val;
+          // 6x1 fully unrolled
+          const float w0 = weight_ptr[0], w1 = weight_ptr[1], w2 = weight_ptr[2];
+          const float w3 = weight_ptr[3], w4 = weight_ptr[4], w5 = weight_ptr[5];
+          for (int f = 0; f < num_frames; f++)
+          {
+            const float in_val = input_ptr[f];
+            const int off = f * 6;
+            output_ptr[off] += w0 * in_val;
+            output_ptr[off + 1] += w1 * in_val;
+            output_ptr[off + 2] += w2 * in_val;
+            output_ptr[off + 3] += w3 * in_val;
+            output_ptr[off + 4] += w4 * in_val;
+            output_ptr[off + 5] += w5 * in_val;
+          }
         }
-      }
-      else if (out_ch == 6 && in_ch == 6)
-      {
-        // 6x6 - unroll weights, loop over frames
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 6 && in_ch == 6)
         {
-          const float* __restrict__ in_col = input_ptr + f * 6;
-          float* __restrict__ out_col = output_ptr + f * 6;
-          const float i0 = in_col[0], i1 = in_col[1], i2 = in_col[2];
-          const float i3 = in_col[3], i4 = in_col[4], i5 = in_col[5];
-          for (int o = 0; o < 6; o++)
+          // 6x6 - unroll weights, loop over frames
+          for (int f = 0; f < num_frames; f++)
           {
-            out_col[o] += weight_ptr[o] * i0 + weight_ptr[6 + o] * i1 + weight_ptr[12 + o] * i2
-                          + weight_ptr[18 + o] * i3 + weight_ptr[24 + o] * i4 + weight_ptr[30 + o] * i5;
+            const float* __restrict__ in_col = input_ptr + f * 6;
+            float* __restrict__ out_col = output_ptr + f * 6;
+            const float i0 = in_col[0], i1 = in_col[1], i2 = in_col[2];
+            const float i3 = in_col[3], i4 = in_col[4], i5 = in_col[5];
+            for (int o = 0; o < 6; o++)
+            {
+              out_col[o] += weight_ptr[o] * i0 + weight_ptr[6 + o] * i1 + weight_ptr[12 + o] * i2
+                            + weight_ptr[18 + o] * i3 + weight_ptr[24 + o] * i4 + weight_ptr[30 + o] * i5;
+            }
           }
         }
-      }
-      else if (out_ch == 8 && in_ch == 8)
-      {
-        // 8x8 - unroll weights, loop over frames
-        for (int f = 0; f < num_frames; f++)
+        else if (out_ch == 8 && in_ch == 8)
         {
-          const float* __restrict__ in_col = input_ptr + f * 8;
-          float* __restrict__ out_col = output_ptr + f * 8;
-          const float i0 = in_col[0], i1 = in_col[1], i2 = in_col[2], i3 = in_col[3];
-          const float i4 = in_col[4], i5 = in_col[5], i6 = in_col[6], i7 = in_col[7];
-          for (int o = 0; o < 8; o++)
+          // 8x8 - unroll weights, loop over frames
+          for (int f = 0; f < num_frames; f++)
           {
-            out_col[o] += weight_ptr[o] * i0 + weight_ptr[8 + o] * i1 + weight_ptr[16 + o] * i2
-                          + weight_ptr[24 + o] * i3 + weight_ptr[32 + o] * i4 + weight_ptr[40 + o] * i5
-                          + weight_ptr[48 + o] * i6 + weight_ptr[56 + o] * i7;
+            const float* __restrict__ in_col = input_ptr + f * 8;
+            float* __restrict__ out_col = output_ptr + f * 8;
+            const float i0 = in_col[0], i1 = in_col[1], i2 = in_col[2], i3 = in_col[3];
+            const float i4 = in_col[4], i5 = in_col[5], i6 = in_col[6], i7 = in_col[7];
+            for (int o = 0; o < 8; o++)
+            {
+              out_col[o] += weight_ptr[o] * i0 + weight_ptr[8 + o] * i1 + weight_ptr[16 + o] * i2
+                            + weight_ptr[24 + o] * i3 + weight_ptr[32 + o] * i4 + weight_ptr[40 + o] * i5
+                            + weight_ptr[48 + o] * i6 + weight_ptr[56 + o] * i7;
+            }
           }
         }
+        else
+        {
+          // Fall back to Eigen for larger matrices where it's more efficient
+          _output.leftCols(num_frames).noalias() += this->_weight[k] * input_block;
+        }
       }
-      else
-      {
-        // Fall back to Eigen for larger matrices where it's more efficient
-        _output.leftCols(num_frames).noalias() += this->_weight[k] * input_block;
-      }
-    }
     } // end else (general GEMM path)
 #else
     // Eigen fallback uses += accumulation, so needs setZero
diff --git a/NAM/convnet.cpp b/NAM/convnet.cpp
index 0a9e2758..329caf61 100644
--- a/NAM/convnet.cpp
+++ b/NAM/convnet.cpp
@@ -341,8 +341,8 @@ nam::convnet::ConvNetConfig nam::convnet::parse_config_json(const nlohmann::json
 // ConvNetConfig::create()
 std::unique_ptr<nam::DSP> nam::convnet::ConvNetConfig::create(std::vector<float> weights, double sampleRate)
 {
-  return std::make_unique<nam::convnet::ConvNet>(in_channels, out_channels, channels, dilations, batchnorm, activation,
-                                                 weights, sampleRate, groups);
+  return std::make_unique<nam::convnet::ConvNet>(
+    in_channels, out_channels, channels, dilations, batchnorm, activation, weights, sampleRate, groups);
 }
 
 // Config parser for ConfigParserRegistry
diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index 3aa22304..e975001b 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -498,7 +498,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
       for (int f = 0; f < num_frames; f++)
       {
         const float in_val = input_ptr[f * in_stride];
-        output_ptr[f * 2]     = w0 * in_val;
+        output_ptr[f * 2] = w0 * in_val;
         output_ptr[f * 2 + 1] = w1 * in_val;
       }
     }
@@ -508,7 +508,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
       for (int f = 0; f < num_frames; f++)
       {
         const float in_val = input_ptr[f * in_stride];
-        output_ptr[f * 3]     = w0 * in_val;
+        output_ptr[f * 3] = w0 * in_val;
         output_ptr[f * 3 + 1] = w1 * in_val;
         output_ptr[f * 3 + 2] = w2 * in_val;
       }
@@ -520,7 +520,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
       for (int f = 0; f < num_frames; f++)
       {
         const float in_val = input_ptr[f * in_stride];
-        output_ptr[f * 4]     = w0 * in_val;
+        output_ptr[f * 4] = w0 * in_val;
         output_ptr[f * 4 + 1] = w1 * in_val;
         output_ptr[f * 4 + 2] = w2 * in_val;
         output_ptr[f * 4 + 3] = w3 * in_val;
@@ -567,7 +567,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
         const float* __restrict__ in_col = input_ptr + f * in_stride;
         const float i0 = in_col[0];
         const float i1 = in_col[1];
-        output_ptr[f * 2]     = w00 * i0 + w01 * i1;
+        output_ptr[f * 2] = w00 * i0 + w01 * i1;
         output_ptr[f * 2 + 1] = w10 * i0 + w11 * i1;
       }
     }
@@ -584,7 +584,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
         const float i1 = in_col[1];
         const float i2 = in_col[2];
         const float i3 = in_col[3];
-        output_ptr[f * 2]     = w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
+        output_ptr[f * 2] = w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
         output_ptr[f * 2 + 1] = w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
       }
     }
@@ -595,8 +595,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
       for (int f = 0; f < num_frames; f++)
       {
         const float* __restrict__ in_col = input_ptr + f * in_stride;
-        output_ptr[f] = w0 * in_col[0] + w1 * in_col[1]
-                      + w2 * in_col[2] + w3 * in_col[3];
+        output_ptr[f] = w0 * in_col[0] + w1 * in_col[1] + w2 * in_col[2] + w3 * in_col[3];
       }
     }
     else if (out_ch == 4 && in_ch == 2)
@@ -608,7 +607,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
         const float* __restrict__ in_col = input_ptr + f * in_stride;
         const float i0 = in_col[0];
         const float i1 = in_col[1];
-        output_ptr[f * 4]     = w00 * i0 + w01 * i1;
+        output_ptr[f * 4] = w00 * i0 + w01 * i1;
         output_ptr[f * 4 + 1] = w10 * i0 + w11 * i1;
         output_ptr[f * 4 + 2] = w20 * i0 + w21 * i1;
         output_ptr[f * 4 + 3] = w30 * i0 + w31 * i1;
@@ -628,7 +627,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
           const float i0 = in_col[0];
           const float i1 = in_col[1];
           const float i2 = in_col[2];
-          output_ptr[f * 3]     = w00 * i0 + w01 * i1 + w02 * i2 + b0;
+          output_ptr[f * 3] = w00 * i0 + w01 * i1 + w02 * i2 + b0;
           output_ptr[f * 3 + 1] = w10 * i0 + w11 * i1 + w12 * i2 + b1;
           output_ptr[f * 3 + 2] = w20 * i0 + w21 * i1 + w22 * i2 + b2;
         }
@@ -642,7 +641,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
           const float i0 = in_col[0];
           const float i1 = in_col[1];
           const float i2 = in_col[2];
-          output_ptr[f * 3]     = w00 * i0 + w01 * i1 + w02 * i2;
+          output_ptr[f * 3] = w00 * i0 + w01 * i1 + w02 * i2;
           output_ptr[f * 3 + 1] = w10 * i0 + w11 * i1 + w12 * i2;
           output_ptr[f * 3 + 2] = w20 * i0 + w21 * i1 + w22 * i2;
         }
@@ -650,9 +649,9 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
     }
     else if (out_ch == 4 && in_ch == 4)
     {
-      const float w00 = weight_ptr[0],  w10 = weight_ptr[1],  w20 = weight_ptr[2],  w30 = weight_ptr[3];
-      const float w01 = weight_ptr[4],  w11 = weight_ptr[5],  w21 = weight_ptr[6],  w31 = weight_ptr[7];
-      const float w02 = weight_ptr[8],  w12 = weight_ptr[9],  w22 = weight_ptr[10], w32 = weight_ptr[11];
+      const float w00 = weight_ptr[0], w10 = weight_ptr[1], w20 = weight_ptr[2], w30 = weight_ptr[3];
+      const float w01 = weight_ptr[4], w11 = weight_ptr[5], w21 = weight_ptr[6], w31 = weight_ptr[7];
+      const float w02 = weight_ptr[8], w12 = weight_ptr[9], w22 = weight_ptr[10], w32 = weight_ptr[11];
       const float w03 = weight_ptr[12], w13 = weight_ptr[13], w23 = weight_ptr[14], w33 = weight_ptr[15];
       for (int f = 0; f < num_frames; f++)
       {
@@ -661,7 +660,7 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
         const float i1 = in_col[1];
         const float i2 = in_col[2];
         const float i3 = in_col[3];
-        output_ptr[f * 4]     = w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
+        output_ptr[f * 4] = w00 * i0 + w01 * i1 + w02 * i2 + w03 * i3;
         output_ptr[f * 4 + 1] = w10 * i0 + w11 * i1 + w12 * i2 + w13 * i3;
         output_ptr[f * 4 + 2] = w20 * i0 + w21 * i1 + w22 * i2 + w23 * i3;
         output_ptr[f * 4 + 3] = w30 * i0 + w31 * i1 + w32 * i2 + w33 * i3;
@@ -677,8 +676,8 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
         const float i3 = in_col[3], i4 = in_col[4], i5 = in_col[5];
         for (int o = 0; o < 6; o++)
         {
-          out_col[o] = weight_ptr[o] * i0 + weight_ptr[6 + o] * i1 + weight_ptr[12 + o] * i2
-                     + weight_ptr[18 + o] * i3 + weight_ptr[24 + o] * i4 + weight_ptr[30 + o] * i5;
+          out_col[o] = weight_ptr[o] * i0 + weight_ptr[6 + o] * i1 + weight_ptr[12 + o] * i2 + weight_ptr[18 + o] * i3
+                       + weight_ptr[24 + o] * i4 + weight_ptr[30 + o] * i5;
         }
       }
     }
@@ -693,7 +692,8 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
         for (int o = 0; o < 8; o++)
         {
           out_col[o] = weight_ptr[o] * i0 + weight_ptr[8 + o] * i1 + weight_ptr[16 + o] * i2 + weight_ptr[24 + o] * i3
-                     + weight_ptr[32 + o] * i4 + weight_ptr[40 + o] * i5 + weight_ptr[48 + o] * i6 + weight_ptr[56 + o] * i7;
+                       + weight_ptr[32 + o] * i4 + weight_ptr[40 + o] * i5 + weight_ptr[48 + o] * i6
+                       + weight_ptr[56 + o] * i7;
         }
       }
     }
@@ -708,7 +708,8 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
         for (int o = 0; o < 4; o++)
         {
           out_col[o] = weight_ptr[o] * i0 + weight_ptr[4 + o] * i1 + weight_ptr[8 + o] * i2 + weight_ptr[12 + o] * i3
-                     + weight_ptr[16 + o] * i4 + weight_ptr[20 + o] * i5 + weight_ptr[24 + o] * i6 + weight_ptr[28 + o] * i7;
+                       + weight_ptr[16 + o] * i4 + weight_ptr[20 + o] * i5 + weight_ptr[24 + o] * i6
+                       + weight_ptr[28 + o] * i7;
         }
       }
     }
@@ -754,56 +755,56 @@ void nam::Conv1x1::process_(const Eigen::Ref<const Eigen::MatrixXf>& input, cons
 #ifdef NAM_USE_INLINE_GEMM
     if (!bias_fused)
     {
-    const int out_ch = (int)get_out_channels();
-    float* __restrict__ output_ptr = _output.data();
-    const float* __restrict__ bias_ptr = this->_bias.data();
+      const int out_ch = (int)get_out_channels();
+      float* __restrict__ output_ptr = _output.data();
+      const float* __restrict__ bias_ptr = this->_bias.data();
 
-    // Specialized paths for common small channel counts
-    if (out_ch == 2)
-    {
-      const float b0 = bias_ptr[0], b1 = bias_ptr[1];
-      for (int f = 0; f < num_frames; f++)
+      // Specialized paths for common small channel counts
+      if (out_ch == 2)
       {
-        const int off = f * 2;
-        output_ptr[off] += b0;
-        output_ptr[off + 1] += b1;
+        const float b0 = bias_ptr[0], b1 = bias_ptr[1];
+        for (int f = 0; f < num_frames; f++)
+        {
+          const int off = f * 2;
+          output_ptr[off] += b0;
+          output_ptr[off + 1] += b1;
+        }
       }
-    }
-    else if (out_ch == 3)
-    {
-      const float b0 = bias_ptr[0], b1 = bias_ptr[1], b2 = bias_ptr[2];
-      for (int f = 0; f < num_frames; f++)
+      else if (out_ch == 3)
       {
-        const int off = f * 3;
-        output_ptr[off] += b0;
-        output_ptr[off + 1] += b1;
-        output_ptr[off + 2] += b2;
+        const float b0 = bias_ptr[0], b1 = bias_ptr[1], b2 = bias_ptr[2];
+        for (int f = 0; f < num_frames; f++)
+        {
+          const int off = f * 3;
+          output_ptr[off] += b0;
+          output_ptr[off + 1] += b1;
+          output_ptr[off + 2] += b2;
+        }
       }
-    }
-    else if (out_ch == 4)
-    {
-      const float b0 = bias_ptr[0], b1 = bias_ptr[1];
-      const float b2 = bias_ptr[2], b3 = bias_ptr[3];
-      for (int f = 0; f < num_frames; f++)
+      else if (out_ch == 4)
       {
-        const int off = f * 4;
-        output_ptr[off] += b0;
-        output_ptr[off + 1] += b1;
-        output_ptr[off + 2] += b2;
-        output_ptr[off + 3] += b3;
+        const float b0 = bias_ptr[0], b1 = bias_ptr[1];
+        const float b2 = bias_ptr[2], b3 = bias_ptr[3];
+        for (int f = 0; f < num_frames; f++)
+        {
+          const int off = f * 4;
+          output_ptr[off] += b0;
+          output_ptr[off + 1] += b1;
+          output_ptr[off + 2] += b2;
+          output_ptr[off + 3] += b3;
+        }
       }
-    }
-    else
-    {
-      for (int f = 0; f < num_frames; f++)
+      else
       {
-        float* __restrict__ out_col = output_ptr + f * out_ch;
-        for (int o = 0; o < out_ch; o++)
+        for (int f = 0; f < num_frames; f++)
         {
-          out_col[o] += bias_ptr[o];
+          float* __restrict__ out_col = output_ptr + f * out_ch;
+          for (int o = 0; o < out_ch; o++)
+          {
+            out_col[o] += bias_ptr[o];
+          }
         }
       }
-    }
     } // !bias_fused
 #else
     _output.leftCols(num_frames).colwise() += this->_bias;
diff --git a/NAM/dsp.h b/NAM/dsp.h
index c20a5163..1fadcf70 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -27,8 +27,8 @@
 /// \brief Use a sample rate of -1 if we don't know what the model expects to be run at
 #define NAM_UNKNOWN_EXPECTED_SAMPLE_RATE -1.0
 
-#if defined(_MSC_VER) && !defined(__llvm__) 
-#define __restrict__ __restrict
+#if defined(_MSC_VER) && !defined(__llvm__)
+  #define __restrict__ __restrict
 #endif
 
 namespace nam
diff --git a/NAM/film.h b/NAM/film.h
index 27685269..d41911d4 100644
--- a/NAM/film.h
+++ b/NAM/film.h
@@ -123,7 +123,7 @@ class FiLM
           int i = 0;
           for (; i + 3 < input_dim; i += 4)
           {
-            out_col[i]     = in_col[i]     * scale_col[i]     + shift_col[i];
+            out_col[i] = in_col[i] * scale_col[i] + shift_col[i];
             out_col[i + 1] = in_col[i + 1] * scale_col[i + 1] + shift_col[i + 1];
             out_col[i + 2] = in_col[i + 2] * scale_col[i + 2] + shift_col[i + 2];
             out_col[i + 3] = in_col[i + 3] * scale_col[i + 3] + shift_col[i + 3];
@@ -161,7 +161,7 @@ class FiLM
           int i = 0;
           for (; i + 3 < input_dim; i += 4)
           {
-            out_col[i]     = in_col[i]     * scale_col[i];
+            out_col[i] = in_col[i] * scale_col[i];
             out_col[i + 1] = in_col[i + 1] * scale_col[i + 1];
             out_col[i + 2] = in_col[i + 2] * scale_col[i + 2];
             out_col[i + 3] = in_col[i + 3] * scale_col[i + 3];
diff --git a/NAM/gating_activations.h b/NAM/gating_activations.h
index 0d52298c..ff21c3b5 100644
--- a/NAM/gating_activations.h
+++ b/NAM/gating_activations.h
@@ -72,7 +72,7 @@ class GatingActivation
     const int input_stride = (int)input.outerStride();
     const float* __restrict__ input_ptr = input.derived().data();
     float* __restrict__ output_ptr = output.derived().data();
-    const int output_stride = (int)output.outerStride();  // Column stride for output
+    const int output_stride = (int)output.outerStride(); // Column stride for output
 
     for (int f = 0; f < num_samples; f++)
     {
@@ -178,7 +178,7 @@ class BlendingActivation
     const int input_stride = (int)input.outerStride();
     const float* __restrict__ input_ptr = input.derived().data();
     float* __restrict__ output_ptr = output.derived().data();
-    const int output_stride = (int)output.outerStride();  // Column stride for output
+    const int output_stride = (int)output.outerStride(); // Column stride for output
 
     for (int f = 0; f < num_samples; f++)
     {
diff --git a/NAM/lstm.cpp b/NAM/lstm.cpp
index 2828d50b..9169a7ee 100644
--- a/NAM/lstm.cpp
+++ b/NAM/lstm.cpp
@@ -179,8 +179,8 @@ nam::lstm::LSTMConfig nam::lstm::parse_config_json(const nlohmann::json& config)
 // LSTMConfig::create()
 std::unique_ptr<nam::DSP> nam::lstm::LSTMConfig::create(std::vector<float> weights, double sampleRate)
 {
-  return std::make_unique<nam::lstm::LSTM>(in_channels, out_channels, num_layers, input_size, hidden_size, weights,
-                                           sampleRate);
+  return std::make_unique<nam::lstm::LSTM>(
+    in_channels, out_channels, num_layers, input_size, hidden_size, weights, sampleRate);
 }
 
 // Config parser for ConfigParserRegistry
diff --git a/tools/render.cpp b/tools/render.cpp
index c3cabb2a..4f50fa97 100644
--- a/tools/render.cpp
+++ b/tools/render.cpp
@@ -38,7 +38,7 @@ bool SaveWavFloat32(const char* fileName, const float* samples, size_t numSample
   const uint32_t fmtSize = 16;
   out.write("fmt ", 4);
   out.write(reinterpret_cast<const char*>(&fmtSize), 4);
-  const uint16_t audioFormat = 3;  // IEEE float
+  const uint16_t audioFormat = 3; // IEEE float
   out.write(reinterpret_cast<const char*>(&audioFormat), 2);
   const uint16_t numChannels = 1;
   out.write(reinterpret_cast<const char*>(&numChannels), 2);
@@ -59,7 +59,7 @@ bool SaveWavFloat32(const char* fileName, const float* samples, size_t numSample
   return out.good();
 }
 
-}  // namespace
+} // namespace
 
 int main(int argc, char* argv[])
 {
@@ -138,8 +138,8 @@ int main(int argc, char* argv[])
   const double expectedRate = model->GetExpectedSampleRate();
   if (expectedRate > 0 && std::abs(inputSampleRate - expectedRate) > 0.5)
   {
-    std::cerr << "Error: Input WAV sample rate (" << inputSampleRate
-              << " Hz) does not match model expected rate (" << expectedRate << " Hz)\n";
+    std::cerr << "Error: Input WAV sample rate (" << inputSampleRate << " Hz) does not match model expected rate ("
+              << expectedRate << " Hz)\n";
     return 1;
   }
 
diff --git a/tools/test/test_container.cpp b/tools/test/test_container.cpp
index 9030e28a..993c9065 100644
--- a/tools/test/test_container.cpp
+++ b/tools/test/test_container.cpp
@@ -38,8 +38,9 @@ nlohmann::json build_container_json(const std::string& small_path, const std::st
   nlohmann::json container;
   container["version"] = "0.7.0";
   container["architecture"] = "SlimmableContainer";
-  container["config"]["submodels"] = nlohmann::json::array(
-    {{{"max_value", 0.33}, {"model", small_model}}, {{"max_value", 0.66}, {"model", medium_model}}, {{"max_value", 1.0}, {"model", large_model}}});
+  container["config"]["submodels"] = nlohmann::json::array({{{"max_value", 0.33}, {"model", small_model}},
+                                                            {{"max_value", 0.66}, {"model", medium_model}},
+                                                            {{"max_value", 1.0}, {"model", large_model}}});
   container["weights"] = nlohmann::json::array();
   container["sample_rate"] = 48000;
   return container;
@@ -74,21 +75,24 @@ void process_and_verify(nam::DSP* dsp, int num_buffers, int buffer_size)
 
 void test_container_loads_from_json()
 {
-  auto j = build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
+  auto j =
+    build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
   auto dsp = nam::get_dsp(j);
   assert(dsp != nullptr);
 }
 
 void test_container_processes_audio()
 {
-  auto j = build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
+  auto j =
+    build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
   auto dsp = nam::get_dsp(j);
   process_and_verify(dsp.get(), 3, 64);
 }
 
 void test_container_slimmable_selects_submodel()
 {
-  auto j = build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
+  auto j =
+    build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
   auto dsp = nam::get_dsp(j);
   const double sample_rate = 48000.0;
   const int buffer_size = 64;
@@ -129,7 +133,8 @@ void test_container_slimmable_selects_submodel()
 
 void test_container_boundary_values()
 {
-  auto j = build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
+  auto j =
+    build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
   auto dsp = nam::get_dsp(j);
   const double sample_rate = 48000.0;
   const int buffer_size = 16;
@@ -230,8 +235,8 @@ void test_container_unsorted_submodels_throws()
   nlohmann::json j;
   j["version"] = "0.7.0";
   j["architecture"] = "SlimmableContainer";
-  j["config"]["submodels"] = nlohmann::json::array(
-    {{{"max_value", 0.8}, {"model", small_json}}, {{"max_value", 0.5}, {"model", medium_json}}});
+  j["config"]["submodels"] =
+    nlohmann::json::array({{{"max_value", 0.8}, {"model", small_json}}, {{"max_value", 0.5}, {"model", medium_json}}});
   j["weights"] = nlohmann::json::array();
   j["sample_rate"] = 48000;
 
@@ -263,8 +268,8 @@ void test_container_sample_rate_mismatch_throws()
   nlohmann::json j;
   j["version"] = "0.7.0";
   j["architecture"] = "SlimmableContainer";
-  j["config"]["submodels"] = nlohmann::json::array(
-    {{{"max_value", 0.5}, {"model", model_44k}}, {{"max_value", 1.0}, {"model", model_48k}}});
+  j["config"]["submodels"] =
+    nlohmann::json::array({{{"max_value", 0.5}, {"model", model_44k}}, {{"max_value", 1.0}, {"model", model_48k}}});
   j["weights"] = nlohmann::json::array();
   j["sample_rate"] = 48000;
 
@@ -296,7 +301,8 @@ void test_container_load_from_file()
 
 void test_container_default_is_max_size()
 {
-  auto j = build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
+  auto j =
+    build_container_json("example_models/lstm.nam", "example_models/wavenet.nam", "example_models/wavenet_a2_max.nam");
   auto dsp = nam::get_dsp(j);
   const double sample_rate = 48000.0;
   const int buffer_size = 64;
diff --git a/tools/test/test_noncontiguous_blocks.cpp b/tools/test/test_noncontiguous_blocks.cpp
index 7044fd10..e72f342e 100644
--- a/tools/test/test_noncontiguous_blocks.cpp
+++ b/tools/test/test_noncontiguous_blocks.cpp
@@ -136,10 +136,10 @@ void test_conv1x1_process_toprows_2x2()
   conv.SetMaxBufferSize(64);
 
   Eigen::MatrixXf full_matrix(total_rows, num_frames);
-  full_matrix << 1.0f, 2.0f, 3.0f,   // row 0 (top, used)
-                 4.0f, 5.0f, 6.0f,   // row 1 (top, used)
-                 99.0f, 99.0f, 99.0f, // row 2 (bottom, NOT used)
-                 99.0f, 99.0f, 99.0f; // row 3 (bottom, NOT used)
+  full_matrix << 1.0f, 2.0f, 3.0f, // row 0 (top, used)
+    4.0f, 5.0f, 6.0f, // row 1 (top, used)
+    99.0f, 99.0f, 99.0f, // row 2 (bottom, NOT used)
+    99.0f, 99.0f, 99.0f; // row 3 (bottom, NOT used)
 
   conv.process_(full_matrix.topRows(bottleneck), num_frames);
   const auto& output = conv.GetOutput();
@@ -236,12 +236,12 @@ void test_film_process_toprows_with_shift()
   // Configure Conv1x1 with zero weights, fixed biases for scale/shift
   std::vector<float> weights((2 * input_dim) * condition_dim + (2 * input_dim), 0.0f);
   const int bias_offset = (2 * input_dim) * condition_dim;
-  weights[bias_offset + 0] = 2.0f;  // scale[0]
+  weights[bias_offset + 0] = 2.0f; // scale[0]
   weights[bias_offset + 1] = -1.0f; // scale[1]
-  weights[bias_offset + 2] = 0.5f;  // scale[2]
+  weights[bias_offset + 2] = 0.5f; // scale[2]
   weights[bias_offset + 3] = 10.0f; // shift[0]
   weights[bias_offset + 4] = -5.0f; // shift[1]
-  weights[bias_offset + 5] = 3.0f;  // shift[2]
+  weights[bias_offset + 5] = 3.0f; // shift[2]
   auto it = weights.begin();
   film.set_weights_(it);
 
@@ -447,7 +447,7 @@ void test_gating_output_toprows()
   {
     for (int c = 0; c < bottleneck; c++)
     {
-      const float input_val = input(c, f);               // identity activation
+      const float input_val = input(c, f); // identity activation
       const float gate_val = 1.0f / (1.0f + expf(-input(c + bottleneck, f))); // sigmoid
       const float expected = input_val * gate_val;
       assert(std::abs(output_matrix(c, f) - expected) < 1e-5f);