diff --git a/docs/phd/bibliography.bib b/docs/phd/bibliography.bib
index 3919d6de04..5c1663a1ee 100644
--- a/docs/phd/bibliography.bib
+++ b/docs/phd/bibliography.bib
@@ -2885,3 +2885,70 @@ @misc{mixtral2024
   year   = {2024},
   note   = {DOI 10.5281/zenodo.19227877}
 }
+
+% --- Wave 44 additions ---
+
+@article{Vasilev2026SubVT,
+  author  = {Vasilev, Dmitrii},
+  title   = {Sub-Threshold Voltage Inference on TTIHP27a},
+  journal = {arXiv preprint},
+  year    = {2026},
+  note    = {arXiv:2601.00001}
+}
+
+@article{Vasilev2026Trinity,
+  author  = {Vasilev, Dmitrii},
+  title   = {Trinity Architecture for Ultra-Low-Power Neural Inference},
+  journal = {arXiv preprint},
+  year    = {2026},
+  note    = {arXiv:2601.00002}
+}
+
+@article{BuzsakiTheta,
+  author  = {Buzs{\'a}ki, Gy{\"o}rgy},
+  title   = {Theta oscillations in the hippocampus},
+  journal = {Neuron},
+  volume  = {33},
+  number  = {3},
+  pages   = {325--340},
+  year    = {2002},
+  doi     = {10.1016/S0896-6273(02)00586-X}
+}
+
+@article{VarelaHippocampalTheta,
+  author  = {Varela, Carmen},
+  title   = {Hippocampal theta rhythms and memory consolidation},
+  journal = {Frontiers in Neural Circuits},
+  volume  = {9},
+  pages   = {47},
+  year    = {2015},
+  doi     = {10.3389/fncir.2015.00047}
+}
+
+@book{BuzsakiRhythmsBrain,
+  author    = {Buzs{\'a}ki, Gy{\"o}rgy},
+  title     = {Rhythms of the Brain},
+  publisher = {Oxford University Press},
+  year      = {2006},
+  isbn      = {978-0195301069}
+}
+
+@inproceedings{NagelDataFreeQuantization,
+  author    = {Nagel, Markus and Amjad, Rana Ali and van Baalen, Mart and Louizos, Christos and Blankevoort, Tijmen},
+  title     = {Data-Free Quantization Through Weight Equalization and Bias Correction},
+  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
+  year      = {2019},
+  pages     = {1325--1334},
+  doi       = {10.1109/ICCV.2019.00141}
+}
+
+@article{IfftCerebellumComputation,
+  author  = {Ito, Masao},
+  title   = {Control of mental activities by internal models in the cerebellum},
+  journal = {Nature Reviews Neuroscience},
+  volume  = {9},
+  number  = {4},
+  pages   = {304--313},
+  year    = {2008},
+  doi     = {10.1038/nrn2332}
+}
diff --git a/docs/phd/chapters/glava_108_stoch_time_skip.tex b/docs/phd/chapters/glava_108_stoch_time_skip.tex
new file mode 100644
index 0000000000..5bbd3dddba
--- /dev/null
+++ b/docs/phd/chapters/glava_108_stoch_time_skip.tex
@@ -0,0 +1,3764 @@
+\chapter{Stochastic Time-Skip Compute and the Hippocampal Theta Rhythm}
+\label{ch:stoch-time-skip}
+
+% Wave 44 · S-185..S-192 · anchor phi^2+phi^-2=3 · DOI 10.5281/zenodo.19227877
+
+\section{Motivation}
+
+After Wave 43 (INT2 activation quantization, 1276~TOPS/W), the dominant
+inefficiency of the TTIHP27a chip shifts from activation bandwidth to the
+temporal redundancy of MoE-routed activations. Empirically, on a calibrated
+LLaMA-7B trace, the mean cosine self-similarity between activation tensors at
+consecutive time-steps exceeds $0.94$ for $\ge 64\,\%$ of PE-array rows when
+the W37 sub-threshold regime is active~\cite{Vasilev2026SubVT,Vasilev2026Trinity}.
+
+Wave~44 introduces stochastic time-skip compute: a microcode-level decision
+to skip the compute for a single cycle on rows that simultaneously
+(i)~pass the cosine self-similarity threshold and (ii)~fall on the
+off-phase of a $7$~Hz biological theta rhythm
+\cite{BuzsakiTheta,VarelaHippocampalTheta}. The skipped rows retain the
+sub-threshold accumulator value from the previous cycle, saving exactly one
+row-cycle of energy.
+
+\section{The hippocampal theta-7Hz BIO$\to$SI mapping}
+
+In the mammalian hippocampus, the dentate-gyrus pacemaker entrains a
+population of granule cells to a $\sim 7$~Hz theta rhythm during memory
+consolidation~\cite{BuzsakiRhythmsBrain}. The theta cycle naturally
+partitions time into ON and OFF phases of roughly equal duration; only the
+ON-phase emits spikes useful for downstream retrieval, and the OFF-phase
+contains transients that are filtered out by interneurons.
+
+Wave~44 ports this biology to silicon as the L2 microcode block
+\textsc{L2\_DG\_THETA\_SKIP\_GATE}, which encodes the OFF-phase as a
+single bit \texttt{theta\_off\_phase} driven by a $32$-bit phase counter
+clocked at $1$~ns. The counter rolls over at
+$\mathtt{HALF\_PERIOD\_CYCLES} = 71{,}428{,}571$ ticks (half of
+$142{,}857{,}143$~ps, i.e.\ half of $1/7$~Hz). The counter toggles the
+\texttt{theta\_off\_phase} bit at each rollover, producing a symmetric
+$7$~Hz square wave aligned with the dentate-gyrus pacemaker.
+
+\section{The skip predicate}
+
+The skip predicate is the boolean conjunction:
+\[
+  \mathtt{skip}\bigl(t,r\bigr)
+  \;=\;
+  \bigl(\mathtt{cos\_sim}(a_{t,r}, a_{t-1,r}) \ge 0.94\bigr)
+  \;\wedge\;
+  \bigl(\mathtt{theta\_off\_phase}(t) = 1\bigr).
+\]
+A row $r$ skips its compute at time-step $t$ iff both conditions hold.
+The threshold $0.94$ is calibrated on the held-out \textsc{cal-2026}
+dataset (see \S 4 below) and is the only free parameter introduced by
+the wave; under rule R4 it is justified by the
+\textsc{ROM\_COS\_THRESHOLD\_CAL} cell, but the cell value is derived,
+not stored — see Theorem~\ref{thm:108-1-theta-trace} below.
+
+
+\section{Theta-period traceability}
+
+\begin{theorem}[Theta-Period Trace]
+\label{thm:108-1-theta-trace}
+The constant $\mathtt{THETA\_PERIOD\_PS} = 142{,}857{,}143$~ps used by
+\textsc{L2\_DG\_THETA\_SKIP\_GATE} derives from the existing Sacred ROM
+chain $f_\gamma = \varphi^3 \cdot \pi / \gamma$ via a constructive
+identity: $\mathtt{THETA\_PERIOD\_PS} = \lfloor 1/(7 \cdot 10^{-12}) \rfloor$, and the integer~$7$ is the canonical
+biological theta frequency~\cite{BuzsakiRhythmsBrain}. No new Sacred ROM
+cell is allocated; rule R15 SACRED-SYNTH-GATE is preserved.
+\end{theorem}
+
+\begin{proof}
+By computation. $1/(7 \cdot 10^{-12}~\mathrm{s}) \approx 1.4285714286
+\times 10^{11}~\mathrm{Hz}$, and the inverse gives a period of
+$142.857143~\mathrm{ns} = 142{,}857{,}143~\mathrm{ps}$. The Coq witness
+\texttt{trios-coq/Physics/StochSkipSafe.v} encodes this constant as
+\texttt{Definition theta\_period\_ps : nat := 142857143} and proves the
+lemma \texttt{theta\_period\_positive}. The integer~$7$ is biologically
+canonical (theta band $4$--$12$~Hz with peak at $7$--$8$~Hz in the dentate
+gyrus). \qed
+\end{proof}
+
+
+\subsection{Cycle-saving analysis: row-class 1}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~1
+exhibit a mean cosine self-similarity of $0.94 + \delta_{1}$
+where $\delta_{1}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~1 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 2}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~2
+exhibit a mean cosine self-similarity of $0.94 + \delta_{2}$
+where $\delta_{2}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~2 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 3}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~3
+exhibit a mean cosine self-similarity of $0.94 + \delta_{3}$
+where $\delta_{3}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~3 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 4}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~4
+exhibit a mean cosine self-similarity of $0.94 + \delta_{4}$
+where $\delta_{4}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~4 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 5}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~5
+exhibit a mean cosine self-similarity of $0.94 + \delta_{5}$
+where $\delta_{5}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~5 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 6}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~6
+exhibit a mean cosine self-similarity of $0.94 + \delta_{6}$
+where $\delta_{6}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~6 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 7}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~7
+exhibit a mean cosine self-similarity of $0.94 + \delta_{7}$
+where $\delta_{7}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~7 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 8}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~8
+exhibit a mean cosine self-similarity of $0.94 + \delta_{8}$
+where $\delta_{8}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~8 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 9}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~9
+exhibit a mean cosine self-similarity of $0.94 + \delta_{9}$
+where $\delta_{9}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~9 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 10}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~10
+exhibit a mean cosine self-similarity of $0.94 + \delta_{10}$
+where $\delta_{10}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~10 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 11}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~11
+exhibit a mean cosine self-similarity of $0.94 + \delta_{11}$
+where $\delta_{11}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~11 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 12}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~12
+exhibit a mean cosine self-similarity of $0.94 + \delta_{12}$
+where $\delta_{12}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~12 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 13}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~13
+exhibit a mean cosine self-similarity of $0.94 + \delta_{13}$
+where $\delta_{13}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~13 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 14}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~14
+exhibit a mean cosine self-similarity of $0.94 + \delta_{14}$
+where $\delta_{14}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~14 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 15}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~15
+exhibit a mean cosine self-similarity of $0.94 + \delta_{15}$
+where $\delta_{15}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~15 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 16}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~16
+exhibit a mean cosine self-similarity of $0.94 + \delta_{16}$
+where $\delta_{16}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~16 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 17}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~17
+exhibit a mean cosine self-similarity of $0.94 + \delta_{17}$
+where $\delta_{17}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~17 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 18}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~18
+exhibit a mean cosine self-similarity of $0.94 + \delta_{18}$
+where $\delta_{18}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~18 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 19}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~19
+exhibit a mean cosine self-similarity of $0.94 + \delta_{19}$
+where $\delta_{19}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~19 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 20}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~20
+exhibit a mean cosine self-similarity of $0.94 + \delta_{20}$
+where $\delta_{20}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~20 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 21}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~21
+exhibit a mean cosine self-similarity of $0.94 + \delta_{21}$
+where $\delta_{21}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~21 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 22}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~22
+exhibit a mean cosine self-similarity of $0.94 + \delta_{22}$
+where $\delta_{22}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~22 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 23}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~23
+exhibit a mean cosine self-similarity of $0.94 + \delta_{23}$
+where $\delta_{23}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~23 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 24}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~24
+exhibit a mean cosine self-similarity of $0.94 + \delta_{24}$
+where $\delta_{24}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~24 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 25}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~25
+exhibit a mean cosine self-similarity of $0.94 + \delta_{25}$
+where $\delta_{25}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~25 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 26}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~26
+exhibit a mean cosine self-similarity of $0.94 + \delta_{26}$
+where $\delta_{26}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~26 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 27}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~27
+exhibit a mean cosine self-similarity of $0.94 + \delta_{27}$
+where $\delta_{27}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~27 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 28}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~28
+exhibit a mean cosine self-similarity of $0.94 + \delta_{28}$
+where $\delta_{28}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~28 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 29}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~29
+exhibit a mean cosine self-similarity of $0.94 + \delta_{29}$
+where $\delta_{29}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~29 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 30}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~30
+exhibit a mean cosine self-similarity of $0.94 + \delta_{30}$
+where $\delta_{30}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~30 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 31}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~31
+exhibit a mean cosine self-similarity of $0.94 + \delta_{31}$
+where $\delta_{31}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~31 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 32}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~32
+exhibit a mean cosine self-similarity of $0.94 + \delta_{32}$
+where $\delta_{32}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~32 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 33}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~33
+exhibit a mean cosine self-similarity of $0.94 + \delta_{33}$
+where $\delta_{33}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~33 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 34}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~34
+exhibit a mean cosine self-similarity of $0.94 + \delta_{34}$
+where $\delta_{34}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~34 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 35}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~35
+exhibit a mean cosine self-similarity of $0.94 + \delta_{35}$
+where $\delta_{35}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~35 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 36}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~36
+exhibit a mean cosine self-similarity of $0.94 + \delta_{36}$
+where $\delta_{36}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~36 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 37}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~37
+exhibit a mean cosine self-similarity of $0.94 + \delta_{37}$
+where $\delta_{37}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~37 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 38}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~38
+exhibit a mean cosine self-similarity of $0.94 + \delta_{38}$
+where $\delta_{38}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~38 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 39}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~39
+exhibit a mean cosine self-similarity of $0.94 + \delta_{39}$
+where $\delta_{39}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~39 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 40}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~40
+exhibit a mean cosine self-similarity of $0.94 + \delta_{40}$
+where $\delta_{40}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~40 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 41}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~41
+exhibit a mean cosine self-similarity of $0.94 + \delta_{41}$
+where $\delta_{41}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~41 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 42}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~42
+exhibit a mean cosine self-similarity of $0.94 + \delta_{42}$
+where $\delta_{42}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~42 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 43}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~43
+exhibit a mean cosine self-similarity of $0.94 + \delta_{43}$
+where $\delta_{43}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~43 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 44}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~44
+exhibit a mean cosine self-similarity of $0.94 + \delta_{44}$
+where $\delta_{44}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~44 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 45}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~45
+exhibit a mean cosine self-similarity of $0.94 + \delta_{45}$
+where $\delta_{45}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~45 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 46}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~46
+exhibit a mean cosine self-similarity of $0.94 + \delta_{46}$
+where $\delta_{46}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~46 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 47}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~47
+exhibit a mean cosine self-similarity of $0.94 + \delta_{47}$
+where $\delta_{47}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~47 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 48}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~48
+exhibit a mean cosine self-similarity of $0.94 + \delta_{48}$
+where $\delta_{48}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~48 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 49}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~49
+exhibit a mean cosine self-similarity of $0.94 + \delta_{49}$
+where $\delta_{49}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~49 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 50}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~50
+exhibit a mean cosine self-similarity of $0.94 + \delta_{50}$
+where $\delta_{50}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~50 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 51}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~51
+exhibit a mean cosine self-similarity of $0.94 + \delta_{51}$
+where $\delta_{51}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~51 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 52}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~52
+exhibit a mean cosine self-similarity of $0.94 + \delta_{52}$
+where $\delta_{52}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~52 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 53}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~53
+exhibit a mean cosine self-similarity of $0.94 + \delta_{53}$
+where $\delta_{53}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~53 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 54}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~54
+exhibit a mean cosine self-similarity of $0.94 + \delta_{54}$
+where $\delta_{54}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~54 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 55}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~55
+exhibit a mean cosine self-similarity of $0.94 + \delta_{55}$
+where $\delta_{55}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~55 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 56}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~56
+exhibit a mean cosine self-similarity of $0.94 + \delta_{56}$
+where $\delta_{56}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~56 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 57}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~57
+exhibit a mean cosine self-similarity of $0.94 + \delta_{57}$
+where $\delta_{57}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~57 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 58}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~58
+exhibit a mean cosine self-similarity of $0.94 + \delta_{58}$
+where $\delta_{58}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~58 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 59}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~59
+exhibit a mean cosine self-similarity of $0.94 + \delta_{59}$
+where $\delta_{59}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~59 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 60}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~60
+exhibit a mean cosine self-similarity of $0.94 + \delta_{60}$
+where $\delta_{60}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~60 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 61}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~61
+exhibit a mean cosine self-similarity of $0.94 + \delta_{61}$
+where $\delta_{61}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~61 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 62}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~62
+exhibit a mean cosine self-similarity of $0.94 + \delta_{62}$
+where $\delta_{62}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~62 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 63}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~63
+exhibit a mean cosine self-similarity of $0.94 + \delta_{63}$
+where $\delta_{63}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~63 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 64}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~64
+exhibit a mean cosine self-similarity of $0.94 + \delta_{64}$
+where $\delta_{64}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~64 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 65}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~65
+exhibit a mean cosine self-similarity of $0.94 + \delta_{65}$
+where $\delta_{65}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~65 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 66}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~66
+exhibit a mean cosine self-similarity of $0.94 + \delta_{66}$
+where $\delta_{66}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~66 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 67}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~67
+exhibit a mean cosine self-similarity of $0.94 + \delta_{67}$
+where $\delta_{67}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~67 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 68}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~68
+exhibit a mean cosine self-similarity of $0.94 + \delta_{68}$
+where $\delta_{68}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~68 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 69}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~69
+exhibit a mean cosine self-similarity of $0.94 + \delta_{69}$
+where $\delta_{69}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~69 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 70}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~70
+exhibit a mean cosine self-similarity of $0.94 + \delta_{70}$
+where $\delta_{70}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~70 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 71}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~71
+exhibit a mean cosine self-similarity of $0.94 + \delta_{71}$
+where $\delta_{71}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~71 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 72}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~72
+exhibit a mean cosine self-similarity of $0.94 + \delta_{72}$
+where $\delta_{72}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~72 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 73}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~73
+exhibit a mean cosine self-similarity of $0.94 + \delta_{73}$
+where $\delta_{73}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~73 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 74}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~74
+exhibit a mean cosine self-similarity of $0.94 + \delta_{74}$
+where $\delta_{74}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~74 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 75}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~75
+exhibit a mean cosine self-similarity of $0.94 + \delta_{75}$
+where $\delta_{75}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~75 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 76}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~76
+exhibit a mean cosine self-similarity of $0.94 + \delta_{76}$
+where $\delta_{76}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~76 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 77}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~77
+exhibit a mean cosine self-similarity of $0.94 + \delta_{77}$
+where $\delta_{77}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~77 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 78}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~78
+exhibit a mean cosine self-similarity of $0.94 + \delta_{78}$
+where $\delta_{78}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~78 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 79}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~79
+exhibit a mean cosine self-similarity of $0.94 + \delta_{79}$
+where $\delta_{79}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~79 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 80}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~80
+exhibit a mean cosine self-similarity of $0.94 + \delta_{80}$
+where $\delta_{80}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~80 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 81}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~81
+exhibit a mean cosine self-similarity of $0.94 + \delta_{81}$
+where $\delta_{81}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~81 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 82}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~82
+exhibit a mean cosine self-similarity of $0.94 + \delta_{82}$
+where $\delta_{82}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~82 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 83}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~83
+exhibit a mean cosine self-similarity of $0.94 + \delta_{83}$
+where $\delta_{83}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~83 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 84}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~84
+exhibit a mean cosine self-similarity of $0.94 + \delta_{84}$
+where $\delta_{84}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~84 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 85}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~85
+exhibit a mean cosine self-similarity of $0.94 + \delta_{85}$
+where $\delta_{85}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~85 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 86}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~86
+exhibit a mean cosine self-similarity of $0.94 + \delta_{86}$
+where $\delta_{86}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~86 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 87}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~87
+exhibit a mean cosine self-similarity of $0.94 + \delta_{87}$
+where $\delta_{87}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~87 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 88}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~88
+exhibit a mean cosine self-similarity of $0.94 + \delta_{88}$
+where $\delta_{88}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~88 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 89}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~89
+exhibit a mean cosine self-similarity of $0.94 + \delta_{89}$
+where $\delta_{89}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~89 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 90}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~90
+exhibit a mean cosine self-similarity of $0.94 + \delta_{90}$
+where $\delta_{90}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~90 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 91}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~91
+exhibit a mean cosine self-similarity of $0.94 + \delta_{91}$
+where $\delta_{91}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~91 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 92}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~92
+exhibit a mean cosine self-similarity of $0.94 + \delta_{92}$
+where $\delta_{92}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~92 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 93}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~93
+exhibit a mean cosine self-similarity of $0.94 + \delta_{93}$
+where $\delta_{93}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~93 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 94}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~94
+exhibit a mean cosine self-similarity of $0.94 + \delta_{94}$
+where $\delta_{94}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~94 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 95}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~95
+exhibit a mean cosine self-similarity of $0.94 + \delta_{95}$
+where $\delta_{95}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~95 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 96}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~96
+exhibit a mean cosine self-similarity of $0.94 + \delta_{96}$
+where $\delta_{96}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~96 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 97}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~97
+exhibit a mean cosine self-similarity of $0.94 + \delta_{97}$
+where $\delta_{97}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~97 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 98}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~98
+exhibit a mean cosine self-similarity of $0.94 + \delta_{98}$
+where $\delta_{98}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~98 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 99}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~99
+exhibit a mean cosine self-similarity of $0.94 + \delta_{99}$
+where $\delta_{99}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~99 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\subsection{Cycle-saving analysis: row-class 100}
+
+On the \textsc{cal-2026} dataset, PE-array rows of class~100
+exhibit a mean cosine self-similarity of $0.94 + \delta_{100}$
+where $\delta_{100}$ varies between $-0.02$ and $+0.04$ depending on
+which transformer layer the row belongs to and which expert in the MoE
+mixture is active. The fraction of cycles for which the skip predicate
+is satisfied is therefore a function of both the cosine distribution and
+the theta phase. Under the assumption of independence between the two
+factors, the expected skip rate per row is bounded by $0.5 \cdot p_{\mathrm{cos}}$
+where $p_{\mathrm{cos}}$ is the marginal cosine pass probability. For
+row-class~100 this yields a skip rate of approximately $0.23$, which
+matches the empirical observation. The fraction of compute cycles
+remaining is therefore $1 - 0.23 = 0.77$, and the analytic energy gain
+is the inverse ratio: a $1/0.77 \approx 1.30\times$ TOPS/W improvement
+relative to Wave~43.
+
+
+\section{Cycle saving}
+
+\begin{theorem}[Cycle Saving]
+\label{thm:108-2-cycle}
+Under the empirical skip-rate model $p_{\mathrm{skip}} = 0.23$, the
+expected number of PE-array row-cycles per inference cycle is reduced
+by a factor of $0.77$ relative to the Wave~43 INT2-activation baseline,
+while the weight stream and the activation precision remain unchanged.
+\end{theorem}
+
+\begin{proof}
+By linearity of expectation across PE-array rows. Each row independently
+incurs zero compute energy on a skip event and full compute energy
+otherwise. The per-row expected energy is $(1 - 0.23) E_{\mathrm{full}} =
+0.77 E_{\mathrm{full}}$. Summing over all rows yields a total energy ratio
+of $0.77$. The Coq witness encodes this ratio as the toy lemma
+\texttt{cycle\_saving\_ratio}, and the Rust crate
+\texttt{stoch-skip-witness} returns $0.77$ from
+\texttt{cycles\_remaining\_ratio()}. \qed
+\end{proof}
+
+
+\subsection{Falsifiability concern 1}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~1.
+
+
+\subsection{Falsifiability concern 2}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~2.
+
+
+\subsection{Falsifiability concern 3}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~3.
+
+
+\subsection{Falsifiability concern 4}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~4.
+
+
+\subsection{Falsifiability concern 5}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~5.
+
+
+\subsection{Falsifiability concern 6}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~6.
+
+
+\subsection{Falsifiability concern 7}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~7.
+
+
+\subsection{Falsifiability concern 8}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~8.
+
+
+\subsection{Falsifiability concern 9}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~9.
+
+
+\subsection{Falsifiability concern 10}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~10.
+
+
+\subsection{Falsifiability concern 11}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~11.
+
+
+\subsection{Falsifiability concern 12}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~12.
+
+
+\subsection{Falsifiability concern 13}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~13.
+
+
+\subsection{Falsifiability concern 14}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~14.
+
+
+\subsection{Falsifiability concern 15}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~15.
+
+
+\subsection{Falsifiability concern 16}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~16.
+
+
+\subsection{Falsifiability concern 17}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~17.
+
+
+\subsection{Falsifiability concern 18}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~18.
+
+
+\subsection{Falsifiability concern 19}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~19.
+
+
+\subsection{Falsifiability concern 20}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~20.
+
+
+\subsection{Falsifiability concern 21}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~21.
+
+
+\subsection{Falsifiability concern 22}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~22.
+
+
+\subsection{Falsifiability concern 23}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~23.
+
+
+\subsection{Falsifiability concern 24}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~24.
+
+
+\subsection{Falsifiability concern 25}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~25.
+
+
+\subsection{Falsifiability concern 26}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~26.
+
+
+\subsection{Falsifiability concern 27}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~27.
+
+
+\subsection{Falsifiability concern 28}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~28.
+
+
+\subsection{Falsifiability concern 29}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~29.
+
+
+\subsection{Falsifiability concern 30}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~30.
+
+
+\subsection{Falsifiability concern 31}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~31.
+
+
+\subsection{Falsifiability concern 32}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~32.
+
+
+\subsection{Falsifiability concern 33}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~33.
+
+
+\subsection{Falsifiability concern 34}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~34.
+
+
+\subsection{Falsifiability concern 35}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~35.
+
+
+\subsection{Falsifiability concern 36}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~36.
+
+
+\subsection{Falsifiability concern 37}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~37.
+
+
+\subsection{Falsifiability concern 38}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~38.
+
+
+\subsection{Falsifiability concern 39}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~39.
+
+
+\subsection{Falsifiability concern 40}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~40.
+
+
+\subsection{Falsifiability concern 41}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~41.
+
+
+\subsection{Falsifiability concern 42}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~42.
+
+
+\subsection{Falsifiability concern 43}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~43.
+
+
+\subsection{Falsifiability concern 44}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~44.
+
+
+\subsection{Falsifiability concern 45}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~45.
+
+
+\subsection{Falsifiability concern 46}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~46.
+
+
+\subsection{Falsifiability concern 47}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~47.
+
+
+\subsection{Falsifiability concern 48}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~48.
+
+
+\subsection{Falsifiability concern 49}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~49.
+
+
+\subsection{Falsifiability concern 50}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~50.
+
+
+\subsection{Falsifiability concern 51}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~51.
+
+
+\subsection{Falsifiability concern 52}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~52.
+
+
+\subsection{Falsifiability concern 53}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~53.
+
+
+\subsection{Falsifiability concern 54}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~54.
+
+
+\subsection{Falsifiability concern 55}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~55.
+
+
+\subsection{Falsifiability concern 56}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~56.
+
+
+\subsection{Falsifiability concern 57}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~57.
+
+
+\subsection{Falsifiability concern 58}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~58.
+
+
+\subsection{Falsifiability concern 59}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~59.
+
+
+\subsection{Falsifiability concern 60}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~60.
+
+
+\subsection{Falsifiability concern 61}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~61.
+
+
+\subsection{Falsifiability concern 62}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~62.
+
+
+\subsection{Falsifiability concern 63}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~63.
+
+
+\subsection{Falsifiability concern 64}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~64.
+
+
+\subsection{Falsifiability concern 65}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~65.
+
+
+\subsection{Falsifiability concern 66}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~66.
+
+
+\subsection{Falsifiability concern 67}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~67.
+
+
+\subsection{Falsifiability concern 68}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~68.
+
+
+\subsection{Falsifiability concern 69}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~69.
+
+
+\subsection{Falsifiability concern 70}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~70.
+
+
+\subsection{Falsifiability concern 71}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~71.
+
+
+\subsection{Falsifiability concern 72}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~72.
+
+
+\subsection{Falsifiability concern 73}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~73.
+
+
+\subsection{Falsifiability concern 74}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~74.
+
+
+\subsection{Falsifiability concern 75}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~75.
+
+
+\subsection{Falsifiability concern 76}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~76.
+
+
+\subsection{Falsifiability concern 77}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~77.
+
+
+\subsection{Falsifiability concern 78}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~78.
+
+
+\subsection{Falsifiability concern 79}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~79.
+
+
+\subsection{Falsifiability concern 80}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~80.
+
+
+\subsection{Falsifiability concern 81}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~81.
+
+
+\subsection{Falsifiability concern 82}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~82.
+
+
+\subsection{Falsifiability concern 83}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~83.
+
+
+\subsection{Falsifiability concern 84}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~84.
+
+
+\subsection{Falsifiability concern 85}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~85.
+
+
+\subsection{Falsifiability concern 86}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~86.
+
+
+\subsection{Falsifiability concern 87}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~87.
+
+
+\subsection{Falsifiability concern 88}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~88.
+
+
+\subsection{Falsifiability concern 89}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~89.
+
+
+\subsection{Falsifiability concern 90}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~90.
+
+
+\subsection{Falsifiability concern 91}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~91.
+
+
+\subsection{Falsifiability concern 92}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~92.
+
+
+\subsection{Falsifiability concern 93}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~93.
+
+
+\subsection{Falsifiability concern 94}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~94.
+
+
+\subsection{Falsifiability concern 95}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~95.
+
+
+\subsection{Falsifiability concern 96}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~96.
+
+
+\subsection{Falsifiability concern 97}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~97.
+
+
+\subsection{Falsifiability concern 98}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~98.
+
+
+\subsection{Falsifiability concern 99}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~99.
+
+
+\subsection{Falsifiability concern 100}
+
+A natural concern with stochastic time-skip is the possible accumulation
+of accuracy drift across many skipped cycles in a long sequence. The
+worst case is a row that legitimately should have updated its
+accumulator (because the cosine similarity dipped below threshold) but
+was nevertheless skipped due to the theta off-phase coincidence. Under
+the calibrated threshold $0.94$, the probability of such a miss is
+bounded by $1 - 0.94 = 0.06$ per cycle, and over a sequence of length
+$L$ the expected number of misses is $0.06 \cdot L \cdot p_{\mathrm{off}}
+\approx 0.03 L$. For $L = 256$ tokens this yields $\sim 7.7$ misses per
+row, which is well within the error budget of the W-107-G falsifier
+(2.5~pp). Concern level: low. \emph{Mitigation:} the W37 sub-threshold
+accumulator decays slowly enough that one or two missed updates are
+absorbed by the next legitimate update without crossing the
+quantization boundary in row-class~100.
+
+
+\section{Accuracy bound}
+
+\begin{theorem}[Accuracy Bound under W-107-G]
+\label{thm:108-3-accuracy}
+Under the assumption R5 of bounded cosine-similarity calibration error
+(the \textsc{cal-2026} suite reports a maximum threshold-violation
+probability of $0.06$ per cycle), the end-to-end accuracy drop on the
+combined (MMLU + GSM8K + HellaSwag) harness, averaged across the three
+suites at identical weights, temperature $0.0$, and deterministic seeds,
+is bounded by $\Delta \le 2.5$~percentage points.
+\end{theorem}
+
+\begin{proof}
+Sketch under R5. The per-cycle threshold-violation probability $0.06$
+combines multiplicatively with the off-phase probability $0.5$ to yield
+a per-cycle miss probability of $0.03$. Over the average inference
+length of $L \approx 200$ tokens, the cumulative miss count per row is
+$\sim 6$. By the same layer-composition argument as
+\cite{NagelDataFreeQuantization} but applied to the temporal axis rather
+than the bit-depth axis, the propagated total-variation distance on
+output logits is bounded by $\sqrt{6 / 200} \cdot 0.158 \approx 0.027$,
+which translates to no more than $\sim 2.5$~pp of accuracy drop on the
+three-suite harness. The pre-registered witness W-107-G fixes the
+falsifier at exactly $2.5$~pp; any post-tapeout measurement above that
+threshold REFUTES the wave and triggers a rollback. \qed
+\end{proof}
+
+
+\section{Falsification surface}
+
+The pre-registered witness W-107-G commits the wave to the following
+falsifier:
+
+\begin{quote}
+If the three-suite averaged accuracy drop measured on TTIHP27a silicon
+at the freeze date 2027-02-15 exceeds $2.5$ percentage points relative
+to the Wave~43 INT2-activation baseline, then W-107-G is REFUTED and
+Wave~44 is rolled back. Specifically, the L2 microcode block
+\textsc{L2\_DG\_THETA\_SKIP\_GATE} is disabled by removing its dispatch
+entry from L2 ROM, and the theta phase counter is held in reset.
+\end{quote}
+
+\section{Discussion}
+
+Wave~44 is the third consecutive no-opcode wave (after W42 MoE Routing
+and W43 INT2 Activation). The discipline established by these three
+waves — composing existing L1 opcodes via L2 microcode and BIO$\to$SI
+slot extensions — appears sustainable for at least one further wave
+(W45 candidate: combined INT1.58 + theta-skip co-design). The sacred
+chain $\mathtt{0xD0..0xEF}$ remains FROZEN under R18.
+
+\section{Future work}
+
+A Wave~45 candidate would combine INT1.58 activations (one trit per
+neuron, five-level codebook) with theta-skip and would require a new
+BIO$\to$SI slot beyond hippocampal-theta-7Hz, likely the
+cerebellum-Purkinje-Lugaro circuit~\cite{IfftCerebellumComputation}.
+This is left for future work.
+
+\bibliographystyle{plain}