diff --git a/Cargo.toml b/Cargo.toml
index b1b6e922..22e286a1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,3 +13,7 @@ opt-level = "z"
 lto = true
 panic = "abort"
 strip = true
+
+[profile.bench]
+debug = true
+strip = false
diff --git a/package.json b/package.json
index 136ce152..72bd4a02 100644
--- a/package.json
+++ b/package.json
@@ -12,7 +12,6 @@
   "packageManager": "pnpm@10.6.0",
   "devDependencies": {
     "@playwright/test": "^1.58.1",
-    "npm-run-all": "^4.1.5",
     "prettier": "^3.8.1",
     "ts-protoc-gen": "^0.15.0",
     "typescript": "^5.9.3"
@@ -22,8 +21,8 @@
     "rust-needs-format": "cargo fmt -- --check",
     "js-format": "find src -name '*.ts' -o -name '*.tsx' | egrep -v '/(lib(\\.(browser|module))?|core)/' | xargs prettier --write",
     "rust-format": "cargo fmt",
-    "format": "npm-run-all -p js-format rust-format",
-    "precommit": "npm-run-all -p js-needs-format rust-needs-format lint",
+    "format": "cargo fmt && pnpm js-format",
+    "precommit": "pnpm js-needs-format && pnpm rust-needs-format && pnpm lint",
     "install-git-hooks": "cd .git/hooks && rm -f pre-commit && ln -s ../../scripts/pre-commit ./pre-commit",
     "lint": "pnpm rust-lint && pnpm -r run lint",
     "tsc": "pnpm --filter @simlin/core exec tsc --noEmit && pnpm --filter @simlin/diagram exec tsc --noEmit && pnpm --filter @simlin/server exec tsc --noEmit && pnpm --filter @simlin/app exec tsc --noEmit",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index f35cd394..a21d31fb 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -11,9 +11,6 @@ importers:
       '@playwright/test':
         specifier: ^1.58.1
         version: 1.58.1
-      npm-run-all:
-        specifier: ^4.1.5
-        version: 4.1.5
       prettier:
         specifier: ^3.8.1
         version: 3.8.1
@@ -2788,10 +2785,6 @@ packages:
     resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==}
     engines: {node: '>=12'}
 
-  ansi-styles@3.2.1:
-    resolution: {integrity: sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==}
-    engines: {node: '>=4'}
-
   ansi-styles@4.3.0:
     resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==}
     engines: {node: '>=8'}
@@ -3145,10 +3138,6 @@ packages:
   ccount@2.0.1:
     resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==}
 
-  chalk@2.4.2:
-    resolution: {integrity: sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==}
-    engines: {node: '>=4'}
-
   chalk@4.1.2:
     resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==}
     engines: {node: '>=10'}
@@ -3266,9 +3255,6 @@ packages:
   collect-v8-coverage@1.0.3:
     resolution: {integrity: sha512-1L5aqIkwPfiodaMgQunkF1zRhNqifHBmtbbbxcr6yVxxBnliw4TDOW6NxpO8DJLgJ16OT+Y4ztZqP6p/FtXnAw==}
 
-  color-convert@1.9.3:
-    resolution: {integrity: sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==}
-
   color-convert@2.0.1:
     resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==}
     engines: {node: '>=7.0.0'}
@@ -3277,9 +3263,6 @@ packages:
     resolution: {integrity: sha512-fasDH2ont2GqF5HpyO4w0+BcewlhHEZOFn9c1ckZdHpJ56Qb7MHhH/IcJZbBGgvdtwdwNbLvxiBEdg336iA9Sg==}
     engines: {node: '>=14.6'}
 
-  color-name@1.1.3:
-    resolution: {integrity: sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==}
-
   color-name@1.1.4:
     resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==}
 
@@ -3446,10 +3429,6 @@ packages:
     engines: {node: '>=10.14', npm: '>=6', yarn: '>=1'}
     hasBin: true
 
-  cross-spawn@6.0.6:
-    resolution: {integrity: sha512-VqCUuhcd1iB+dsv8gxPttb5iZh/D0iubSP21g36KXdEuf6I5JiioesUVjpCdHV9MZRUfVFlvwtIUyPfxo5trtw==}
-    engines: {node: '>=4.8'}
-
   cross-spawn@7.0.6:
     resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
     engines: {node: '>= 8'}
@@ -3814,10 +3793,6 @@ packages:
   escape-html@1.0.3:
     resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==}
 
-  escape-string-regexp@1.0.5:
-    resolution: {integrity: sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==}
-    engines: {node: '>=0.8.0'}
-
   escape-string-regexp@2.0.0:
     resolution: {integrity: sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==}
     engines: {node: '>=8'}
@@ -4409,10 +4384,6 @@ packages:
     resolution: {integrity: sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg==}
     engines: {node: '>= 0.4'}
 
-  has-flag@3.0.0:
-    resolution: {integrity: sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==}
-    engines: {node: '>=4'}
-
   has-flag@4.0.0:
     resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
     engines: {node: '>=8'}
@@ -4511,9 +4482,6 @@ packages:
     resolution: {integrity: sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw==}
     engines: {node: '>=16.9.0'}
 
-  hosted-git-info@2.8.9:
-    resolution: {integrity: sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==}
-
   hosted-git-info@7.0.2:
     resolution: {integrity: sha512-puUZAUKT5m8Zzvs72XWy3HtvVbTWljRE66cP60bxJzAqf2DgICo7lYTY2IHUmLnNpjYvw5bvmoHvPc0QO2a62w==}
     engines: {node: ^16.14.0 || >=18.0.0}
@@ -5152,9 +5120,6 @@ packages:
   json-buffer@3.0.1:
     resolution: {integrity: sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==}
 
-  json-parse-better-errors@1.0.2:
-    resolution: {integrity: sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==}
-
   json-parse-even-better-errors@2.3.1:
     resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==}
 
@@ -5263,10 +5228,6 @@ packages:
   lines-and-columns@1.2.4:
     resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==}
 
-  load-json-file@4.0.0:
-    resolution: {integrity: sha512-Kx8hMakjX03tiGTLAIdJ+lL0htKnXjEZN6hk/tozf/WOuYGdZBJrZ+rCJRbVCugsjB3jMLn9746NsQIf5VjBMw==}
-    engines: {node: '>=4'}
-
   loader-runner@4.3.1:
     resolution: {integrity: sha512-IWqP2SCPhyVFTBtRcgMHdzlf9ul25NwaFx4wCEH/KjAXuuHY4yNjvPXsBokp8jCB936PyWRaPKUNh8NvylLp2Q==}
     engines: {node: '>=6.11.5'}
@@ -5497,10 +5458,6 @@ packages:
     peerDependencies:
       tslib: '2'
 
-  memorystream@0.3.1:
-    resolution: {integrity: sha512-S3UwM3yj5mtUSEfP41UZmt/0SCoVYUcU1rkXv+BQ5Ig8ndL4sPoJNBUJERafdPb5jjHJGuMgytgKvKIf58XNBw==}
-    engines: {node: '>= 0.10.0'}
-
   merge-descriptors@1.0.3:
     resolution: {integrity: sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==}
 
@@ -5795,9 +5752,6 @@ packages:
     resolution: {integrity: sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==}
     engines: {node: '>= 0.4.0'}
 
-  nice-try@1.0.5:
-    resolution: {integrity: sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ==}
-
   node-addon-api@7.1.1:
     resolution: {integrity: sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==}
 
@@ -5843,9 +5797,6 @@ packages:
     engines: {node: ^20.17.0 || >=22.9.0}
     hasBin: true
 
-  normalize-package-data@2.5.0:
-    resolution: {integrity: sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==}
-
   normalize-path@3.0.0:
     resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==}
     engines: {node: '>=0.10.0'}
@@ -5866,11 +5817,6 @@ packages:
     resolution: {integrity: sha512-nkc+3pIIhqHVQr085X9d2JzPzLyjzQS96zbruppqC9aZRm/x8xx6xhI98gHtsfELP2bE+loHq8ZaHFHhe+NauA==}
     engines: {node: ^16.14.0 || >=18.0.0}
 
-  npm-run-all@4.1.5:
-    resolution: {integrity: sha512-Oo82gJDAVcaMdi3nuoKFavkIHBRVqQ1qvMb+9LHk/cF4P6B2m8aP04hGf7oL6wZ9BuGwX1onlLhpuoofSyoQDQ==}
-    engines: {node: '>= 4'}
-    hasBin: true
-
   npm-run-path@4.0.1:
     resolution: {integrity: sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==}
     engines: {node: '>=8'}
@@ -6025,10 +5971,6 @@ packages:
   parse-entities@4.0.2:
     resolution: {integrity: sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==}
 
-  parse-json@4.0.0:
-    resolution: {integrity: sha512-aOIos8bujGN93/8Ox/jPLh7RwVnPEysynVFE+fQZyg6jKELEHwzgKdLRFHUgXJL6kylijVSBC4BvN9OmsB48Rw==}
-    engines: {node: '>=4'}
-
   parse-json@5.2.0:
     resolution: {integrity: sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==}
     engines: {node: '>=8'}
@@ -6071,10 +6013,6 @@ packages:
     resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==}
     engines: {node: '>=0.10.0'}
 
-  path-key@2.0.1:
-    resolution: {integrity: sha512-fEHGKCSmUSDPv4uoj8AlD+joPlq3peND+HRYyxFz4KPw4z926S/b8rIuFs2FYJg3BwsxJf6A9/3eIdLaYC+9Dw==}
-    engines: {node: '>=4'}
-
   path-key@3.1.1:
     resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
     engines: {node: '>=8'}
@@ -6099,10 +6037,6 @@ packages:
   path-to-regexp@8.3.0:
     resolution: {integrity: sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==}
 
-  path-type@3.0.0:
-    resolution: {integrity: sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==}
-    engines: {node: '>=4'}
-
   pause@0.0.1:
     resolution: {integrity: sha512-KG8UEiEVkR3wGEb4m5yZkVCzigAD+cVEJck2CzYZO37ZGJfctvVptVO192MwrtPhzONn6go8ylnOdMhKqi4nfg==}
 
@@ -6164,15 +6098,6 @@ packages:
     resolution: {integrity: sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==}
     engines: {node: '>=12'}
 
-  pidtree@0.3.1:
-    resolution: {integrity: sha512-qQbW94hLHEqCg7nhby4yRC7G2+jYHY4Rguc2bjw7Uug4GIJuu1tvf2uHaZv5Q8zdt+WKJ6qK1FOI6amaWUo5FA==}
-    engines: {node: '>=0.10'}
-    hasBin: true
-
-  pify@3.0.0:
-    resolution: {integrity: sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg==}
-    engines: {node: '>=4'}
-
   pirates@4.0.7:
     resolution: {integrity: sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==}
     engines: {node: '>= 6'}
@@ -6471,10 +6396,6 @@ packages:
     resolution: {integrity: sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==}
     engines: {node: '>=0.10.0'}
 
-  read-pkg@3.0.0:
-    resolution: {integrity: sha512-BLq/cCO9two+lBgiTYNqD6GdtK8s4NpaWrl6/rCO9w0TUS8oJl7cmToOZfRYllKTISY6nt1U7jQ53brmKqY6BA==}
-    engines: {node: '>=4'}
-
   readable-stream@2.3.8:
     resolution: {integrity: sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==}
 
@@ -6866,26 +6787,14 @@ packages:
   shallowequal@1.1.0:
     resolution: {integrity: sha512-y0m1JoUZSlPAjXVtPPW70aZWfIL/dSP7AFkRnniLCrK/8MDKog3TySTBmckD+RObVxH0v4Tox67+F14PdED2oQ==}
 
-  shebang-command@1.2.0:
-    resolution: {integrity: sha512-EV3L1+UQWGor21OmnvojK36mhg+TyIKDh3iFBKBohr5xeXIhNBcx8oWdgkTEEQ+BEFFYdLRuqMfd5L84N1V5Vg==}
-    engines: {node: '>=0.10.0'}
-
   shebang-command@2.0.0:
     resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==}
     engines: {node: '>=8'}
 
-  shebang-regex@1.0.0:
-    resolution: {integrity: sha512-wpoSFAxys6b2a2wHZ1XpDSgD7N9iVjg29Ph9uV/uaP9Ex/KXlkTZTeddxDPSYQpgvzKLGJke2UU0AzoGCjNIvQ==}
-    engines: {node: '>=0.10.0'}
-
   shebang-regex@3.0.0:
     resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==}
     engines: {node: '>=8'}
 
-  shell-quote@1.8.3:
-    resolution: {integrity: sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==}
-    engines: {node: '>= 0.4'}
-
   shimmer@1.2.1:
     resolution: {integrity: sha512-sQTKC1Re/rM6XyFM6fIAGHRPVGvyXfgzIDvzoq608vM+jeyVD0Tu1E6Np0Kc2zAIFWIj963V2800iF/9LPieQw==}
 
@@ -6980,18 +6889,6 @@ packages:
   space-separated-tokens@2.0.2:
     resolution: {integrity: sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==}
 
-  spdx-correct@3.2.0:
-    resolution: {integrity: sha512-kN9dJbvnySHULIluDHy32WHRUu3Og7B9sbY7tsFLctQkIqnMh3hErYgdMjTYuqmcXX+lK5T1lnUt3G7zNswmZA==}
-
-  spdx-exceptions@2.5.0:
-    resolution: {integrity: sha512-PiU42r+xO4UbUS1buo3LPJkjlO7430Xn5SVAhdpzzsPHsjbYVflnnFdATgabnLude+Cqu25p6N+g2lw/PFsa4w==}
-
-  spdx-expression-parse@3.0.1:
-    resolution: {integrity: sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==}
-
-  spdx-license-ids@3.0.22:
-    resolution: {integrity: sha512-4PRT4nh1EImPbt2jASOKHX7PB7I+e4IWNLvkKFDxNhJlfjbYlleYQh285Z/3mPTHSAK/AvdMmw5BNNuYH8ShgQ==}
-
   split2@4.2.0:
     resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==}
     engines: {node: '>= 10.x'}
@@ -7070,10 +6967,6 @@ packages:
     resolution: {integrity: sha512-6CC9uyBL+/48dYizRf7H7VAYCMCNTBeM78x/VTUe9bFEaxBepPJDa1Ow99LqI/1yF7kuy7Q3cQsYMrcjGUcskA==}
     engines: {node: '>= 0.4'}
 
-  string.prototype.padend@3.1.6:
-    resolution: {integrity: sha512-XZpspuSB7vJWhvJc9DLSlrXl1mcA2BdoY5jjnS135ydXqLoqhs96JjDtCkjJEQHvfqZIp9hBuBMgI589peyx9Q==}
-    engines: {node: '>= 0.4'}
-
   string.prototype.repeat@1.0.0:
     resolution: {integrity: sha512-0u/TldDbKD8bFCQ/4f5+mNRrXwZ8hg2w7ZR8wa16e8z9XpePWl3eGEcUD0OXpEH/VJH/2G3gjUtR3ZOiBe2S/w==}
 
@@ -7144,10 +7037,6 @@ packages:
     engines: {node: 20 || 22 || 24}
     hasBin: true
 
-  supports-color@5.5.0:
-    resolution: {integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==}
-    engines: {node: '>=4'}
-
   supports-color@7.2.0:
     resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==}
     engines: {node: '>=8'}
@@ -7620,9 +7509,6 @@ packages:
   valid-url@1.0.9:
     resolution: {integrity: sha512-QQDsV8OnSf5Uc30CKSwG9lnhMPe6exHtTXLRYX8uMwKENy640pU+2BgBL0LRbDh/eYRahNCS7aewCx0wf3NYVA==}
 
-  validate-npm-package-license@3.0.4:
-    resolution: {integrity: sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==}
-
   validate-npm-package-name@5.0.1:
     resolution: {integrity: sha512-OljLrQ9SQdOUqTaQxqL5dEfZWrXExyyWsozYlAWFawPVNuD83igl7uJD2RTkNMbniIYgt8l81eCJGIdQF7avLQ==}
     engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
@@ -7737,10 +7623,6 @@ packages:
     resolution: {integrity: sha512-LYfpUkmqwl0h9A2HL09Mms427Q1RZWuOHsukfVcKRq9q95iQxdw0ix1JQrqbcDR9PH1QDwf5Qo8OZb5lksZ8Xg==}
     engines: {node: '>= 0.4'}
 
-  which@1.3.1:
-    resolution: {integrity: sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==}
-    hasBin: true
-
   which@2.0.2:
     resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==}
     engines: {node: '>= 8'}
@@ -10579,10 +10461,6 @@ snapshots:
 
   ansi-regex@6.2.2: {}
 
-  ansi-styles@3.2.1:
-    dependencies:
-      color-convert: 1.9.3
-
   ansi-styles@4.3.0:
     dependencies:
       color-convert: 2.0.1
@@ -11039,12 +10917,6 @@ snapshots:
 
   ccount@2.0.1: {}
 
-  chalk@2.4.2:
-    dependencies:
-      ansi-styles: 3.2.1
-      escape-string-regexp: 1.0.5
-      supports-color: 5.5.0
-
   chalk@4.1.2:
     dependencies:
       ansi-styles: 4.3.0
@@ -11153,10 +11025,6 @@ snapshots:
 
   collect-v8-coverage@1.0.3: {}
 
-  color-convert@1.9.3:
-    dependencies:
-      color-name: 1.1.3
-
   color-convert@2.0.1:
     dependencies:
       color-name: 1.1.4
@@ -11165,8 +11033,6 @@ snapshots:
     dependencies:
       color-name: 2.1.0
 
-  color-name@1.1.3: {}
-
   color-name@1.1.4: {}
 
   color-name@2.1.0: {}
@@ -11337,14 +11203,6 @@ snapshots:
     dependencies:
       cross-spawn: 7.0.6
 
-  cross-spawn@6.0.6:
-    dependencies:
-      nice-try: 1.0.5
-      path-key: 2.0.1
-      semver: 5.7.2
-      shebang-command: 1.2.0
-      which: 1.3.1
-
   cross-spawn@7.0.6:
     dependencies:
       path-key: 3.1.1
@@ -11761,8 +11619,6 @@ snapshots:
 
   escape-html@1.0.3: {}
 
-  escape-string-regexp@1.0.5: {}
-
   escape-string-regexp@2.0.0: {}
 
   escape-string-regexp@4.0.0: {}
@@ -12708,8 +12564,6 @@ snapshots:
 
   has-bigints@1.1.0: {}
 
-  has-flag@3.0.0: {}
-
   has-flag@4.0.0: {}
 
   has-property-descriptors@1.0.2:
@@ -12845,8 +12699,6 @@ snapshots:
 
   hono@4.11.7: {}
 
-  hosted-git-info@2.8.9: {}
-
   hosted-git-info@7.0.2:
     dependencies:
       lru-cache: 10.4.3
@@ -13678,8 +13530,6 @@ snapshots:
 
   json-buffer@3.0.1: {}
 
-  json-parse-better-errors@1.0.2: {}
-
   json-parse-even-better-errors@2.3.1: {}
 
   json-parse-helpfulerror@1.0.3:
@@ -13796,13 +13646,6 @@ snapshots:
 
   lines-and-columns@1.2.4: {}
 
-  load-json-file@4.0.0:
-    dependencies:
-      graceful-fs: 4.2.11
-      parse-json: 4.0.0
-      pify: 3.0.0
-      strip-bom: 3.0.0
-
   loader-runner@4.3.1: {}
 
   loader-utils@2.0.4:
@@ -14142,8 +13985,6 @@ snapshots:
       tree-dump: 1.1.0(tslib@2.8.1)
       tslib: 2.8.1
 
-  memorystream@0.3.1: {}
-
   merge-descriptors@1.0.3: {}
 
   merge-descriptors@2.0.0: {}
@@ -14573,8 +14414,6 @@ snapshots:
 
   netmask@2.0.2: {}
 
-  nice-try@1.0.5: {}
-
   node-addon-api@7.1.1:
     optional: true
 
@@ -14626,13 +14465,6 @@ snapshots:
       abbrev: 4.0.0
     optional: true
 
-  normalize-package-data@2.5.0:
-    dependencies:
-      hosted-git-info: 2.8.9
-      resolve: 1.22.11
-      semver: 5.7.2
-      validate-npm-package-license: 3.0.4
-
   normalize-path@3.0.0: {}
 
   npm-install-checks@6.3.0:
@@ -14655,18 +14487,6 @@ snapshots:
       npm-package-arg: 11.0.3
       semver: 7.7.3
 
-  npm-run-all@4.1.5:
-    dependencies:
-      ansi-styles: 3.2.1
-      chalk: 2.4.2
-      cross-spawn: 6.0.6
-      memorystream: 0.3.1
-      minimatch: 3.1.2
-      pidtree: 0.3.1
-      read-pkg: 3.0.0
-      shell-quote: 1.8.3
-      string.prototype.padend: 3.1.6
-
   npm-run-path@4.0.1:
     dependencies:
       path-key: 3.1.1
@@ -14861,11 +14681,6 @@ snapshots:
       is-decimal: 2.0.1
       is-hexadecimal: 2.0.1
 
-  parse-json@4.0.0:
-    dependencies:
-      error-ex: 1.3.4
-      json-parse-better-errors: 1.0.2
-
   parse-json@5.2.0:
     dependencies:
       '@babel/code-frame': 7.29.0
@@ -14906,8 +14721,6 @@ snapshots:
 
   path-is-absolute@1.0.1: {}
 
-  path-key@2.0.1: {}
-
   path-key@3.1.1: {}
 
   path-parse@1.0.7: {}
@@ -14931,10 +14744,6 @@ snapshots:
 
   path-to-regexp@8.3.0: {}
 
-  path-type@3.0.0:
-    dependencies:
-      pify: 3.0.0
-
   pause@0.0.1: {}
 
   pbkdf2@3.1.5:
@@ -14997,10 +14806,6 @@ snapshots:
 
   picomatch@4.0.3: {}
 
-  pidtree@0.3.1: {}
-
-  pify@3.0.0: {}
-
   pirates@4.0.7: {}
 
   pkce-challenge@5.0.1: {}
@@ -15310,12 +15115,6 @@ snapshots:
 
   react@19.2.4: {}
 
-  read-pkg@3.0.0:
-    dependencies:
-      load-json-file: 4.0.0
-      normalize-package-data: 2.5.0
-      path-type: 3.0.0
-
   readable-stream@2.3.8:
     dependencies:
       core-util-is: 1.0.3
@@ -15821,20 +15620,12 @@ snapshots:
 
   shallowequal@1.1.0: {}
 
-  shebang-command@1.2.0:
-    dependencies:
-      shebang-regex: 1.0.0
-
   shebang-command@2.0.0:
     dependencies:
       shebang-regex: 3.0.0
 
-  shebang-regex@1.0.0: {}
-
   shebang-regex@3.0.0: {}
 
-  shell-quote@1.8.3: {}
-
   shimmer@1.2.1: {}
 
   side-channel-list@1.0.0:
@@ -15945,20 +15736,6 @@ snapshots:
 
   space-separated-tokens@2.0.2: {}
 
-  spdx-correct@3.2.0:
-    dependencies:
-      spdx-expression-parse: 3.0.1
-      spdx-license-ids: 3.0.22
-
-  spdx-exceptions@2.5.0: {}
-
-  spdx-expression-parse@3.0.1:
-    dependencies:
-      spdx-exceptions: 2.5.0
-      spdx-license-ids: 3.0.22
-
-  spdx-license-ids@3.0.22: {}
-
   split2@4.2.0: {}
 
   sprintf-js@1.0.3: {}
@@ -16062,13 +15839,6 @@ snapshots:
       set-function-name: 2.0.2
       side-channel: 1.1.0
 
-  string.prototype.padend@3.1.6:
-    dependencies:
-      call-bind: 1.0.8
-      define-properties: 1.2.1
-      es-abstract: 1.24.1
-      es-object-atoms: 1.1.1
-
   string.prototype.repeat@1.0.0:
     dependencies:
       define-properties: 1.2.1
@@ -16164,10 +15934,6 @@ snapshots:
       - encoding
       - supports-color
 
-  supports-color@5.5.0:
-    dependencies:
-      has-flag: 3.0.0
-
   supports-color@7.2.0:
     dependencies:
       has-flag: 4.0.0
@@ -16712,11 +16478,6 @@ snapshots:
 
   valid-url@1.0.9: {}
 
-  validate-npm-package-license@3.0.4:
-    dependencies:
-      spdx-correct: 3.2.0
-      spdx-expression-parse: 3.0.1
-
   validate-npm-package-name@5.0.1: {}
 
   varint@6.0.0: {}
@@ -16878,10 +16639,6 @@ snapshots:
       gopd: 1.2.0
       has-tostringtag: 1.0.2
 
-  which@1.3.1:
-    dependencies:
-      isexe: 2.0.0
-
   which@2.0.2:
     dependencies:
       isexe: 2.0.0
diff --git a/src/simlin-engine/src/bytecode.rs b/src/simlin-engine/src/bytecode.rs
index c23df5e8..8c349543 100644
--- a/src/simlin-engine/src/bytecode.rs
+++ b/src/simlin-engine/src/bytecode.rs
@@ -26,6 +26,21 @@ pub type DimId = u16; // Index into dimensions table
 pub type TempId = u8; // Temp array ID (max 256 temps per module)
 pub type PcOffset = i16; // Relative PC offset for jumps (signed for backward jumps)
 pub type NameId = u16; // Index into names table
+pub type DimListId = u16; // Index into dim_lists table (for [DimId; 4] or [u16; 4])
+
+/// Fixed capacity for the VM arithmetic stack.
+///
+/// 64 is generous for system dynamics expressions: the stack depth equals the
+/// maximum nesting depth of an expression tree. Even complex equations like
+/// `IF(a > b AND c < d, MAX(e, f) * g + h, MIN(i, j) / k - l)` use ~5 slots.
+/// The stack resets to 0 after every assignment opcode, so depth depends only on
+/// expression complexity, not on model size.
+///
+/// `ByteCodeBuilder::finish()` validates at compile time that no bytecode
+/// sequence exceeds this capacity, making the VM's unsafe unchecked stack
+/// access provably safe. The `#![deny(unsafe_code)]` crate attribute ensures
+/// no other unsafe code can be added without explicit opt-in.
+pub(crate) const STACK_CAPACITY: usize = 64;
 
 /// Lookup interpolation mode for graphical function tables.
 #[repr(u8)]
@@ -529,7 +544,7 @@ pub(crate) enum Op2 {
 /// - Array iteration (BeginIter, LoadIterElement, etc.)
 /// - Array reductions (ArraySum, ArrayMax, etc.)
 #[cfg_attr(feature = "debug-derive", derive(Debug))]
-#[derive(Clone)]
+#[derive(Clone, Copy)]
 #[allow(dead_code)] // Array opcodes not yet emitted by compiler
 pub(crate) enum Opcode {
     // === ARITHMETIC & LOGIC ===
@@ -597,6 +612,28 @@ pub(crate) enum Opcode {
         mode: LookupMode,
     },
 
+    // === SUPERINSTRUCTIONS (fused opcodes for common patterns) ===
+    /// Fused LoadConstant + AssignCurr.
+    /// curr[module_off + off] = literals[literal_id]; stack unchanged.
+    AssignConstCurr {
+        off: VariableOffset,
+        literal_id: LiteralId,
+    },
+
+    /// Fused Op2 + AssignCurr.
+    /// Pops two values, applies binary op, assigns result to curr[module_off + off].
+    BinOpAssignCurr {
+        op: Op2,
+        off: VariableOffset,
+    },
+
+    /// Fused Op2 + AssignNext.
+    /// Pops two values, applies binary op, assigns result to next[module_off + off].
+    BinOpAssignNext {
+        op: Op2,
+        off: VariableOffset,
+    },
+
     // =========================================================================
     // ARRAY SUPPORT (new)
     // =========================================================================
@@ -604,17 +641,17 @@ pub(crate) enum Opcode {
     // === VIEW STACK: Building views dynamically ===
     /// Push a view for a variable's full array onto the view stack.
     /// Looks up dimension info to compute strides.
+    /// The dim_list_id references a (n_dims, [DimId; 4]) entry in ByteCodeContext.dim_lists.
     PushVarView {
-        base_off: VariableOffset, // Variable offset in curr[]
-        n_dims: u8,               // Number of dimensions (1-4)
-        dim_ids: [DimId; 4],      // Dimension IDs (padded with 0 if < 4)
+        base_off: VariableOffset,
+        dim_list_id: DimListId,
     },
 
     /// Push a view for a temp array onto the view stack.
+    /// The dim_list_id references a (n_dims, [DimId; 4]) entry in ByteCodeContext.dim_lists.
     PushTempView {
         temp_id: TempId,
-        n_dims: u8,
-        dim_ids: [DimId; 4],
+        dim_list_id: DimListId,
     },
 
     /// Push a pre-computed static view onto the view stack.
@@ -624,10 +661,10 @@ pub(crate) enum Opcode {
 
     /// Push a view for a variable with explicit dimension sizes.
     /// Used when we have bounds but not dim_ids (e.g., dynamic subscripts).
+    /// The dim_list_id references a (n_dims, [u16; 4]) entry in ByteCodeContext.dim_lists.
     PushVarViewDirect {
-        base_off: VariableOffset, // Variable offset in curr[]
-        n_dims: u8,               // Number of dimensions (1-4)
-        dims: [u16; 4],           // Explicit dimension sizes (padded with 0 if < 4)
+        base_off: VariableOffset,
+        dim_list_id: DimListId,
     },
 
     /// Apply single-element subscript with constant index to top view.
@@ -785,6 +822,132 @@ pub(crate) enum Opcode {
     EndBroadcastIter {},
 }
 
+impl Opcode {
+    /// Returns the jump offset if this opcode is a backward jump instruction.
+    /// Centralizes jump handling so new jump opcodes can't be silently missed
+    /// by the peephole optimizer or other passes.
+    fn jump_offset(&self) -> Option<PcOffset> {
+        match self {
+            Opcode::NextIterOrJump { jump_back } | Opcode::NextBroadcastOrJump { jump_back } => {
+                Some(*jump_back)
+            }
+            _ => None,
+        }
+    }
+
+    /// Mutably borrow the jump offset, if this opcode is a backward jump.
+    fn jump_offset_mut(&mut self) -> Option<&mut PcOffset> {
+        match self {
+            Opcode::NextIterOrJump { jump_back } | Opcode::NextBroadcastOrJump { jump_back } => {
+                Some(jump_back)
+            }
+            _ => None,
+        }
+    }
+
+    /// Returns (pops, pushes) describing this opcode's effect on the arithmetic stack.
+    /// Used by `ByteCode::max_stack_depth` to statically validate that compiled
+    /// bytecode cannot overflow the fixed-size VM stack.
+    ///
+    /// Opcodes that only affect the view stack, iter stack, or broadcast stack
+    /// return (0, 0) since they don't touch the arithmetic stack.
+    fn stack_effect(&self) -> (u8, u8) {
+        match self {
+            // Arithmetic: pop 2, push 1
+            Opcode::Op2 { .. } => (2, 1),
+            // Logic: pop 1, push 1
+            Opcode::Not {} => (1, 1),
+
+            // Constants/variables: push 1
+            Opcode::LoadConstant { .. }
+            | Opcode::LoadVar { .. }
+            | Opcode::LoadGlobalVar { .. }
+            | Opcode::LoadModuleInput { .. } => (0, 1),
+
+            // Legacy subscript: PushSubscriptIndex pops an index from the
+            // arithmetic stack and appends it to a separate subscript_index
+            // SmallVec (not the arithmetic stack). Multiple PushSubscriptIndex
+            // ops may precede a single LoadSubscript for multi-dimensional
+            // access, but each only pops 1 from the arithmetic stack.
+            Opcode::PushSubscriptIndex { .. } => (1, 0),
+            // LoadSubscript consumes the accumulated subscript_index entries
+            // and pushes the looked-up value onto the arithmetic stack.
+            Opcode::LoadSubscript { .. } => (0, 1),
+
+            // Control flow
+            Opcode::SetCond {} => (1, 0), // pops condition
+            Opcode::If {} => (2, 1),      // pops true+false branches, pushes result
+            Opcode::Ret => (0, 0),
+
+            // Module eval: pops n_inputs from the caller's arithmetic stack.
+            // The child module executes with its own stack context (via EvalState)
+            // and writes results directly to curr/next, not back to the caller's
+            // arithmetic stack, so pushes = 0 from the caller's perspective.
+            Opcode::EvalModule { n_inputs, .. } => (*n_inputs, 0),
+
+            // Assignment: pops 1 (the value to assign)
+            Opcode::AssignCurr { .. } | Opcode::AssignNext { .. } => (1, 0),
+
+            // Builtins always take 3 args (actual + padding), push 1 result
+            Opcode::Apply { .. } => (3, 1),
+            // Lookup pops element_offset and lookup_index, pushes result
+            Opcode::Lookup { .. } => (2, 1),
+
+            // Superinstructions
+            Opcode::AssignConstCurr { .. } => (0, 0), // reads literal directly
+            Opcode::BinOpAssignCurr { .. } => (2, 0), // pops 2, assigns directly
+            Opcode::BinOpAssignNext { .. } => (2, 0), // pops 2, assigns directly
+
+            // View stack ops don't touch arithmetic stack
+            Opcode::PushVarView { .. }
+            | Opcode::PushTempView { .. }
+            | Opcode::PushStaticView { .. }
+            | Opcode::PushVarViewDirect { .. }
+            | Opcode::ViewSubscriptConst { .. }
+            | Opcode::ViewRange { .. }
+            | Opcode::ViewStarRange { .. }
+            | Opcode::ViewWildcard { .. }
+            | Opcode::ViewTranspose {}
+            | Opcode::PopView {}
+            | Opcode::DupView {} => (0, 0),
+
+            // Dynamic subscript/range ops pop from arithmetic stack
+            Opcode::ViewSubscriptDynamic { .. } => (1, 0),
+            Opcode::ViewRangeDynamic { .. } => (2, 0),
+
+            // Temp array access
+            Opcode::LoadTempConst { .. } => (0, 1),
+            Opcode::LoadTempDynamic { .. } => (1, 1), // pops index, pushes value
+
+            // Iteration: BeginIter/EndIter don't touch arithmetic stack
+            Opcode::BeginIter { .. } | Opcode::EndIter {} => (0, 0),
+            // LoadIter* push 1 element
+            Opcode::LoadIterElement {}
+            | Opcode::LoadIterTempElement { .. }
+            | Opcode::LoadIterViewTop {}
+            | Opcode::LoadIterViewAt { .. } => (0, 1),
+            // StoreIterElement pops 1 value
+            Opcode::StoreIterElement {} => (1, 0),
+            // NextIter doesn't touch arithmetic stack
+            Opcode::NextIterOrJump { .. } => (0, 0),
+
+            // Array reductions push 1 result
+            Opcode::ArraySum {}
+            | Opcode::ArrayMax {}
+            | Opcode::ArrayMin {}
+            | Opcode::ArrayMean {}
+            | Opcode::ArrayStddev {}
+            | Opcode::ArraySize {} => (0, 1),
+
+            // Broadcasting
+            Opcode::BeginBroadcastIter { .. } | Opcode::EndBroadcastIter {} => (0, 0),
+            Opcode::LoadBroadcastElement { .. } => (0, 1),
+            Opcode::StoreBroadcastElement {} => (1, 0),
+            Opcode::NextBroadcastOrJump { .. } => (0, 0),
+        }
+    }
+}
+
 // ============================================================================
 // Module and Array Declarations
 // ============================================================================
@@ -878,6 +1041,11 @@ pub struct ByteCodeContext {
     pub(crate) temp_offsets: Vec<usize>,
     /// Total size needed for temp_storage
     pub(crate) temp_total_size: usize,
+
+    // === Dim list side table ===
+    /// Packed (n_dims, [DimId or u16; 4]) entries referenced by DimListId.
+    /// Each entry stores the dimension count and up to 4 IDs.
+    pub(crate) dim_lists: Vec<(u8, [u16; 4])>,
 }
 
 #[allow(dead_code)] // Methods used by array bytecode not yet emitted
@@ -942,6 +1110,23 @@ impl ByteCodeContext {
         }
         None
     }
+
+    /// Add a dim list entry (n_dims + up to 4 IDs) and return its DimListId.
+    pub fn add_dim_list(&mut self, n_dims: u8, ids: [u16; 4]) -> DimListId {
+        self.dim_lists.push((n_dims, ids));
+        (self.dim_lists.len() - 1) as DimListId
+    }
+
+    /// Get a dim list entry by ID.
+    ///
+    /// Panics on out-of-bounds ID, which is intentional: IDs are only produced
+    /// by `add_dim_list` during compilation, so an invalid ID indicates a
+    /// compiler bug that should surface immediately rather than be silently
+    /// converted to a default value.
+    pub fn get_dim_list(&self, id: DimListId) -> (u8, &[u16; 4]) {
+        let (n, ref ids) = self.dim_lists[id as usize];
+        (n, ids)
+    }
 }
 
 #[cfg_attr(feature = "debug-derive", derive(Debug))]
@@ -951,6 +1136,32 @@ pub struct ByteCode {
     pub(crate) code: Vec<Opcode>,
 }
 
+impl ByteCode {
+    /// Statically compute the maximum arithmetic stack depth reached by this bytecode.
+    ///
+    /// Walks the opcode stream applying each instruction's stack effect. Because
+    /// SD expressions are straight-line (no conditional jumps that could create
+    /// divergent stack depths -- backward jumps from iteration opcodes always
+    /// return to the same stack depth), a single linear pass is sufficient.
+    pub(crate) fn max_stack_depth(&self) -> usize {
+        let mut depth: usize = 0;
+        let mut max_depth: usize = 0;
+        for (pc, op) in self.code.iter().enumerate() {
+            let (pops, pushes) = op.stack_effect();
+            // Use checked_sub rather than saturating_sub: an underflow here
+            // means stack_effect() metadata is wrong for some opcode, which
+            // would silently invalidate our safety proof. Panicking surfaces
+            // the bug immediately in tests.
+            depth = depth.checked_sub(pops as usize).unwrap_or_else(|| {
+                panic!("stack_effect underflow at pc {pc}: {pops} pops but depth is {depth}")
+            });
+            depth += pushes as usize;
+            max_depth = max_depth.max(depth);
+        }
+        max_depth
+    }
+}
+
 #[cfg_attr(feature = "debug-derive", derive(Debug))]
 #[derive(Clone, Default)]
 pub struct ByteCodeBuilder {
@@ -980,7 +1191,113 @@ impl ByteCodeBuilder {
     }
 
     pub(crate) fn finish(self) -> ByteCode {
-        self.bytecode
+        let mut bc = self.bytecode;
+        bc.peephole_optimize();
+
+        // Validate that the compiled bytecode cannot overflow the VM's
+        // fixed-size stack. This makes the unsafe unchecked stack access
+        // in the VM provably safe for this bytecode.
+        let depth = bc.max_stack_depth();
+        assert!(
+            depth < STACK_CAPACITY,
+            "compiled bytecode requires stack depth {depth}, exceeding VM capacity {STACK_CAPACITY}"
+        );
+
+        bc
+    }
+}
+
+impl ByteCode {
+    /// Peephole optimization pass: fuse common opcode sequences into
+    /// superinstructions to reduce dispatch overhead.
+    ///
+    /// Only fuses adjacent instructions when neither is a jump target.
+    /// Jump offsets are recalculated after fusion using an old->new PC map.
+    fn peephole_optimize(&mut self) {
+        if self.code.is_empty() {
+            return;
+        }
+
+        // 1. Build set of PCs that are jump targets
+        let mut jump_targets = vec![false; self.code.len()];
+        for (pc, op) in self.code.iter().enumerate() {
+            if let Some(offset) = op.jump_offset() {
+                let target = (pc as isize + offset as isize) as usize;
+                assert!(
+                    target < jump_targets.len(),
+                    "jump at pc {pc} targets {target}, which is out of bounds (code length: {})",
+                    self.code.len()
+                );
+                jump_targets[target] = true;
+            }
+        }
+
+        // 2. Build old_pc -> new_pc mapping and fused output.
+        // pc_map has one entry per original instruction so that jump fixup
+        // can index by the original PC directly.
+        let mut optimized: Vec<Opcode> = Vec::with_capacity(self.code.len());
+        let mut pc_map: Vec<usize> = Vec::with_capacity(self.code.len() + 1);
+        let mut i = 0;
+        while i < self.code.len() {
+            let new_pc = optimized.len();
+            pc_map.push(new_pc);
+
+            // Only try fusion if the next instruction is not a jump target.
+            // We intentionally don't check whether instruction i itself is a
+            // jump target: the fused instruction replaces both i and i+1 at the
+            // same PC, so jumps to i still land on the correct (fused) opcode.
+            let can_fuse = i + 1 < self.code.len() && !jump_targets[i + 1];
+
+            if can_fuse {
+                let fused = match (&self.code[i], &self.code[i + 1]) {
+                    // Pattern: LoadConstant + AssignCurr -> AssignConstCurr
+                    (Opcode::LoadConstant { id }, Opcode::AssignCurr { off }) => {
+                        Some(Opcode::AssignConstCurr {
+                            off: *off,
+                            literal_id: *id,
+                        })
+                    }
+                    // Pattern: Op2 + AssignCurr -> BinOpAssignCurr
+                    (Opcode::Op2 { op }, Opcode::AssignCurr { off }) => {
+                        Some(Opcode::BinOpAssignCurr { op: *op, off: *off })
+                    }
+                    // Pattern: Op2 + AssignNext -> BinOpAssignNext
+                    (Opcode::Op2 { op }, Opcode::AssignNext { off }) => {
+                        Some(Opcode::BinOpAssignNext { op: *op, off: *off })
+                    }
+                    _ => None,
+                };
+
+                if let Some(op) = fused {
+                    optimized.push(op);
+                    // Both old PCs map to the same new PC
+                    pc_map.push(new_pc);
+                    i += 2;
+                    continue;
+                }
+            }
+
+            // No pattern matched - copy opcode as-is
+            optimized.push(self.code[i]);
+            i += 1;
+        }
+        // Sentinel for instructions past the end
+        pc_map.push(optimized.len());
+
+        // 3. Fix up jump offsets.  Iterate original code to find jumps,
+        // then use pc_map (indexed by old_pc) for O(1) translation.
+        for (old_pc, op) in self.code.iter().enumerate() {
+            let Some(jump_back) = op.jump_offset() else {
+                continue;
+            };
+            let new_pc = pc_map[old_pc];
+            let old_target = (old_pc as isize + jump_back as isize) as usize;
+            let new_target = pc_map[old_target];
+            let new_jump_back = (new_target as isize - new_pc as isize) as PcOffset;
+            *optimized[new_pc].jump_offset_mut().unwrap() = new_jump_back;
+        }
+
+        self.code = optimized;
     }
 }
 
@@ -1012,14 +1329,396 @@ mod tests {
         assert_eq!(2, bytecode.literals.len());
     }
 
+    // =========================================================================
+    // Stack Effect Tests
+    // =========================================================================
+
+    #[test]
+    fn test_stack_effect_arithmetic_ops() {
+        // Binary ops: pop 2, push 1
+        assert_eq!((Opcode::Op2 { op: Op2::Add }).stack_effect(), (2, 1));
+        assert_eq!((Opcode::Op2 { op: Op2::Mul }).stack_effect(), (2, 1));
+        assert_eq!((Opcode::Op2 { op: Op2::Gt }).stack_effect(), (2, 1));
+
+        // Unary not: pop 1, push 1
+        assert_eq!((Opcode::Not {}).stack_effect(), (1, 1));
+    }
+
+    #[test]
+    fn test_stack_effect_loads() {
+        assert_eq!((Opcode::LoadConstant { id: 0 }).stack_effect(), (0, 1));
+        assert_eq!((Opcode::LoadVar { off: 0 }).stack_effect(), (0, 1));
+        assert_eq!((Opcode::LoadGlobalVar { off: 0 }).stack_effect(), (0, 1));
+        assert_eq!(
+            (Opcode::LoadModuleInput { input: 0 }).stack_effect(),
+            (0, 1)
+        );
+    }
+
+    #[test]
+    fn test_stack_effect_assignments() {
+        assert_eq!((Opcode::AssignCurr { off: 0 }).stack_effect(), (1, 0));
+        assert_eq!((Opcode::AssignNext { off: 0 }).stack_effect(), (1, 0));
+    }
+
+    #[test]
+    fn test_stack_effect_superinstructions() {
+        assert_eq!(
+            (Opcode::AssignConstCurr {
+                off: 0,
+                literal_id: 0
+            })
+            .stack_effect(),
+            (0, 0)
+        );
+        assert_eq!(
+            (Opcode::BinOpAssignCurr {
+                op: Op2::Add,
+                off: 0
+            })
+            .stack_effect(),
+            (2, 0)
+        );
+        assert_eq!(
+            (Opcode::BinOpAssignNext {
+                op: Op2::Add,
+                off: 0
+            })
+            .stack_effect(),
+            (2, 0)
+        );
+    }
+
+    #[test]
+    fn test_stack_effect_builtins() {
+        assert_eq!(
+            (Opcode::Apply {
+                func: BuiltinId::Abs
+            })
+            .stack_effect(),
+            (3, 1)
+        );
+        assert_eq!(
+            (Opcode::Lookup {
+                base_gf: 0,
+                table_count: 1,
+                mode: LookupMode::Interpolate,
+            })
+            .stack_effect(),
+            (2, 1)
+        );
+    }
+
+    #[test]
+    fn test_stack_effect_control_flow() {
+        assert_eq!((Opcode::SetCond {}).stack_effect(), (1, 0));
+        assert_eq!((Opcode::If {}).stack_effect(), (2, 1));
+        assert_eq!(Opcode::Ret.stack_effect(), (0, 0));
+    }
+
+    #[test]
+    fn test_stack_effect_eval_module() {
+        assert_eq!(
+            (Opcode::EvalModule { id: 0, n_inputs: 3 }).stack_effect(),
+            (3, 0)
+        );
+        assert_eq!(
+            (Opcode::EvalModule { id: 0, n_inputs: 0 }).stack_effect(),
+            (0, 0)
+        );
+    }
+
+    #[test]
+    fn test_stack_effect_view_ops_dont_affect_arithmetic_stack() {
+        assert_eq!(
+            (Opcode::PushVarView {
+                base_off: 0,
+                dim_list_id: 0,
+            })
+            .stack_effect(),
+            (0, 0)
+        );
+        assert_eq!((Opcode::PopView {}).stack_effect(), (0, 0));
+        assert_eq!((Opcode::DupView {}).stack_effect(), (0, 0));
+        assert_eq!(
+            (Opcode::ViewSubscriptConst {
+                dim_idx: 0,
+                index: 0,
+            })
+            .stack_effect(),
+            (0, 0)
+        );
+    }
+
+    #[test]
+    fn test_stack_effect_dynamic_view_ops_pop_from_arithmetic_stack() {
+        assert_eq!(
+            (Opcode::ViewSubscriptDynamic { dim_idx: 0 }).stack_effect(),
+            (1, 0)
+        );
+        assert_eq!(
+            (Opcode::ViewRangeDynamic { dim_idx: 0 }).stack_effect(),
+            (2, 0)
+        );
+    }
+
+    #[test]
+    fn test_stack_effect_iteration() {
+        assert_eq!(
+            (Opcode::BeginIter {
+                write_temp_id: 0,
+                has_write_temp: false,
+            })
+            .stack_effect(),
+            (0, 0)
+        );
+        assert_eq!((Opcode::LoadIterElement {}).stack_effect(), (0, 1));
+        assert_eq!((Opcode::StoreIterElement {}).stack_effect(), (1, 0));
+        assert_eq!((Opcode::EndIter {}).stack_effect(), (0, 0));
+    }
+
+    #[test]
+    fn test_stack_effect_array_reductions() {
+        assert_eq!((Opcode::ArraySum {}).stack_effect(), (0, 1));
+        assert_eq!((Opcode::ArrayMax {}).stack_effect(), (0, 1));
+        assert_eq!((Opcode::ArrayMin {}).stack_effect(), (0, 1));
+        assert_eq!((Opcode::ArrayMean {}).stack_effect(), (0, 1));
+        assert_eq!((Opcode::ArrayStddev {}).stack_effect(), (0, 1));
+        assert_eq!((Opcode::ArraySize {}).stack_effect(), (0, 1));
+    }
+
+    // =========================================================================
+    // Max Stack Depth Tests
+    // =========================================================================
+
+    #[test]
+    fn test_max_stack_depth_empty() {
+        let bc = ByteCode::default();
+        assert_eq!(bc.max_stack_depth(), 0);
+    }
+
+    #[test]
+    fn test_max_stack_depth_simple_assignment() {
+        // x = 42.0: LoadConstant(42.0), AssignCurr(x)
+        let bc = ByteCode {
+            literals: vec![42.0],
+            code: vec![
+                Opcode::LoadConstant { id: 0 },
+                Opcode::AssignCurr { off: 0 },
+            ],
+        };
+        assert_eq!(bc.max_stack_depth(), 1);
+    }
+
+    #[test]
+    fn test_max_stack_depth_binary_expression() {
+        // x = a + b: LoadVar(a), LoadVar(b), Op2(Add), AssignCurr(x)
+        let bc = ByteCode {
+            literals: vec![],
+            code: vec![
+                Opcode::LoadVar { off: 0 },
+                Opcode::LoadVar { off: 1 },
+                Opcode::Op2 { op: Op2::Add },
+                Opcode::AssignCurr { off: 2 },
+            ],
+        };
+        assert_eq!(bc.max_stack_depth(), 2);
+    }
+
+    #[test]
+    fn test_max_stack_depth_nested_expression() {
+        // x = (a + b) * (c + d):
+        // LoadVar(a), LoadVar(b), Op2(Add), LoadVar(c), LoadVar(d), Op2(Add), Op2(Mul), AssignCurr
+        // Peak depth is 3: after loading c while (a+b) result is still on stack
+        let bc = ByteCode {
+            literals: vec![],
+            code: vec![
+                Opcode::LoadVar { off: 0 },    // depth: 1
+                Opcode::LoadVar { off: 1 },    // depth: 2
+                Opcode::Op2 { op: Op2::Add },  // depth: 1
+                Opcode::LoadVar { off: 2 },    // depth: 2
+                Opcode::LoadVar { off: 3 },    // depth: 3 (peak)
+                Opcode::Op2 { op: Op2::Add },  // depth: 2
+                Opcode::Op2 { op: Op2::Mul },  // depth: 1
+                Opcode::AssignCurr { off: 4 }, // depth: 0
+            ],
+        };
+        assert_eq!(bc.max_stack_depth(), 3);
+    }
+
+    #[test]
+    fn test_max_stack_depth_builtin_function() {
+        // x = ABS(a): LoadVar(a), LoadConstant(0), LoadConstant(0), Apply(Abs), AssignCurr
+        let bc = ByteCode {
+            literals: vec![0.0],
+            code: vec![
+                Opcode::LoadVar { off: 0 },
+                Opcode::LoadConstant { id: 0 },
+                Opcode::LoadConstant { id: 0 },
+                Opcode::Apply {
+                    func: BuiltinId::Abs,
+                },
+                Opcode::AssignCurr { off: 1 },
+            ],
+        };
+        assert_eq!(bc.max_stack_depth(), 3);
+    }
+
+    #[test]
+    fn test_max_stack_depth_if_expression() {
+        // IF(cond, a, b): LoadVar(cond), SetCond, LoadVar(a), LoadVar(b), If, AssignCurr
+        let bc = ByteCode {
+            literals: vec![],
+            code: vec![
+                Opcode::LoadVar { off: 0 },    // depth: 1
+                Opcode::SetCond {},            // depth: 0
+                Opcode::LoadVar { off: 1 },    // depth: 1
+                Opcode::LoadVar { off: 2 },    // depth: 2
+                Opcode::If {},                 // depth: 1
+                Opcode::AssignCurr { off: 3 }, // depth: 0
+            ],
+        };
+        assert_eq!(bc.max_stack_depth(), 2);
+    }
+
+    #[test]
+    fn test_max_stack_depth_superinstruction_const_assign() {
+        // AssignConstCurr doesn't use the stack at all
+        let bc = ByteCode {
+            literals: vec![42.0],
+            code: vec![Opcode::AssignConstCurr {
+                off: 0,
+                literal_id: 0,
+            }],
+        };
+        assert_eq!(bc.max_stack_depth(), 0);
+    }
+
+    #[test]
+    fn test_max_stack_depth_multiple_assignments() {
+        // x = a; y = b + c
+        // Stack resets to 0 after each assignment, so peak is max of individual expressions
+        let bc = ByteCode {
+            literals: vec![],
+            code: vec![
+                Opcode::LoadVar { off: 0 },
+                Opcode::AssignCurr { off: 3 },
+                Opcode::LoadVar { off: 1 },
+                Opcode::LoadVar { off: 2 },
+                Opcode::Op2 { op: Op2::Add },
+                Opcode::AssignCurr { off: 4 },
+            ],
+        };
+        assert_eq!(bc.max_stack_depth(), 2);
+    }
+
+    #[test]
+    fn test_max_stack_depth_with_iteration() {
+        // Iteration body: LoadIterElement, StoreIterElement -- each iteration
+        // pushes 1 and pops 1, so peak depth within loop is 1
+        let bc = ByteCode {
+            literals: vec![],
+            code: vec![
+                Opcode::BeginIter {
+                    write_temp_id: 0,
+                    has_write_temp: true,
+                },
+                Opcode::LoadIterElement {},
+                Opcode::StoreIterElement {},
+                Opcode::NextIterOrJump { jump_back: -2 },
+                Opcode::EndIter {},
+            ],
+        };
+        assert_eq!(bc.max_stack_depth(), 1);
+    }
+
+    #[test]
+    fn test_max_stack_depth_multidimensional_subscript() {
+        // a[i, j]: two PushSubscriptIndex (each pops 1 index from the arithmetic
+        // stack, writing to a separate subscript_index SmallVec), then LoadSubscript
+        // pushes the result. The indices must be loaded before being popped.
+        // LoadVar(i), PushSubscriptIndex, LoadVar(j), PushSubscriptIndex, LoadSubscript, Assign
+        let bc = ByteCode {
+            literals: vec![],
+            code: vec![
+                Opcode::LoadVar { off: 0 },               // depth: 1 (load index i)
+                Opcode::PushSubscriptIndex { bounds: 3 }, // depth: 0 (pop i)
+                Opcode::LoadVar { off: 1 },               // depth: 1 (load index j)
+                Opcode::PushSubscriptIndex { bounds: 4 }, // depth: 0 (pop j)
+                Opcode::LoadSubscript { off: 10 },        // depth: 1 (push result)
+                Opcode::AssignCurr { off: 20 },           // depth: 0
+            ],
+        };
+        assert_eq!(bc.max_stack_depth(), 1);
+    }
+
+    #[test]
+    fn test_finish_validates_stack_depth() {
+        // Build bytecode that fits within STACK_CAPACITY -- should succeed
+        let mut builder = ByteCodeBuilder::default();
+        let id = builder.intern_literal(1.0);
+        builder.push_opcode(Opcode::LoadConstant { id });
+        builder.push_opcode(Opcode::AssignCurr { off: 0 });
+        let _bc = builder.finish(); // should not panic
+    }
+
+    #[test]
+    #[should_panic(expected = "stack_effect underflow at pc 0")]
+    fn test_max_stack_depth_catches_underflow() {
+        // An Op2 at the start with nothing on the stack should panic,
+        // catching bugs in stack_effect metadata
+        let bc = ByteCode {
+            literals: vec![],
+            code: vec![Opcode::Op2 { op: Op2::Add }],
+        };
+        bc.max_stack_depth();
+    }
+
+    #[test]
+    #[should_panic(expected = "jump at pc 0 targets")]
+    fn test_peephole_panics_on_out_of_bounds_jump_target() {
+        // A jump that targets beyond the code length indicates a compiler bug
+        let mut bc = ByteCode {
+            literals: vec![],
+            code: vec![Opcode::NextIterOrJump { jump_back: 10 }],
+        };
+        bc.peephole_optimize();
+    }
+
+    // =========================================================================
+    // Jump Offset Tests
+    // =========================================================================
+
+    #[test]
+    fn test_jump_offset_returns_offset_for_jump_opcodes() {
+        let iter_jump = Opcode::NextIterOrJump { jump_back: -5 };
+        assert_eq!(iter_jump.jump_offset(), Some(-5));
+
+        let broadcast_jump = Opcode::NextBroadcastOrJump { jump_back: -3 };
+        assert_eq!(broadcast_jump.jump_offset(), Some(-3));
+
+        assert_eq!(Opcode::Ret.jump_offset(), None);
+        assert_eq!((Opcode::Op2 { op: Op2::Add }).jump_offset(), None);
+        assert_eq!((Opcode::LoadVar { off: 0 }).jump_offset(), None);
+    }
+
+    #[test]
+    fn test_jump_offset_mut_modifies_jump() {
+        let mut op = Opcode::NextIterOrJump { jump_back: -5 };
+        if let Some(offset) = op.jump_offset_mut() {
+            *offset = -2;
+        }
+        assert_eq!(op.jump_offset(), Some(-2));
+    }
+
     #[test]
     fn test_opcode_size() {
         use std::mem::size_of;
-        // With array support opcodes (PushVarView has [DimId; 4] = 8 bytes),
-        // the opcode size increases. We accept up to 16 bytes.
+        // Large inline arrays ([DimId; 4]) moved to a side table, so
+        // the largest variant payload is now ViewRange (u8 + u16 + u16 = 5 bytes)
+        // or Lookup (u8 + u16 + u8 = 4 bytes). With discriminant, expect 8 bytes.
         let size = size_of::<Opcode>();
-        assert!(size <= 16, "Opcode size {} exceeds 16 bytes", size);
-        // Print actual size for documentation
+        assert!(size <= 8, "Opcode size {} exceeds 8 bytes", size);
         eprintln!("Opcode size: {} bytes", size);
     }
 
@@ -1677,6 +2376,572 @@ mod tests {
         // For scalar, always return offset
         assert_eq!(view.offset_for_iter_index(0), 5);
     }
+
+    // =========================================================================
+    // Peephole Optimizer Tests
+    // =========================================================================
+
+    #[test]
+    fn test_peephole_empty_bytecode() {
+        let mut bc = ByteCode {
+            code: vec![],
+            literals: vec![],
+        };
+        bc.peephole_optimize();
+        assert!(bc.code.is_empty());
+    }
+
+    #[test]
+    fn test_peephole_single_instruction() {
+        let mut bc = ByteCode {
+            code: vec![Opcode::Ret],
+            literals: vec![],
+        };
+        bc.peephole_optimize();
+        assert_eq!(bc.code.len(), 1);
+        assert!(matches!(bc.code[0], Opcode::Ret));
+    }
+
+    #[test]
+    fn test_peephole_no_fusible_patterns() {
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadVar { off: 0 },
+                Opcode::LoadVar { off: 1 },
+                Opcode::Not {},
+                Opcode::Ret,
+            ],
+            literals: vec![],
+        };
+        bc.peephole_optimize();
+        assert_eq!(bc.code.len(), 4);
+        assert!(matches!(bc.code[0], Opcode::LoadVar { off: 0 }));
+        assert!(matches!(bc.code[1], Opcode::LoadVar { off: 1 }));
+        assert!(matches!(bc.code[2], Opcode::Not {}));
+        assert!(matches!(bc.code[3], Opcode::Ret));
+    }
+
+    #[test]
+    fn test_peephole_load_constant_assign_curr_fusion() {
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadConstant { id: 0 },
+                Opcode::AssignCurr { off: 5 },
+            ],
+            literals: vec![42.0],
+        };
+        bc.peephole_optimize();
+
+        assert_eq!(bc.code.len(), 1);
+        match &bc.code[0] {
+            Opcode::AssignConstCurr { off, literal_id } => {
+                assert_eq!(*off, 5);
+                assert_eq!(*literal_id, 0);
+            }
+            _ => panic!("expected AssignConstCurr"),
+        }
+    }
+
+    #[test]
+    fn test_peephole_op2_assign_curr_fusion() {
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadVar { off: 0 },
+                Opcode::LoadVar { off: 1 },
+                Opcode::Op2 { op: Op2::Add },
+                Opcode::AssignCurr { off: 2 },
+            ],
+            literals: vec![],
+        };
+        bc.peephole_optimize();
+
+        // LoadVar, LoadVar stay; Op2+AssignCurr fuse into BinOpAssignCurr
+        assert_eq!(bc.code.len(), 3);
+        assert!(matches!(bc.code[0], Opcode::LoadVar { off: 0 }));
+        assert!(matches!(bc.code[1], Opcode::LoadVar { off: 1 }));
+        match &bc.code[2] {
+            Opcode::BinOpAssignCurr { op, off } => {
+                assert!(matches!(op, Op2::Add));
+                assert_eq!(*off, 2);
+            }
+            _ => panic!("expected BinOpAssignCurr"),
+        }
+    }
+
+    #[test]
+    fn test_peephole_op2_assign_next_fusion() {
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadVar { off: 0 },
+                Opcode::LoadVar { off: 1 },
+                Opcode::Op2 { op: Op2::Mul },
+                Opcode::AssignNext { off: 3 },
+            ],
+            literals: vec![],
+        };
+        bc.peephole_optimize();
+
+        assert_eq!(bc.code.len(), 3);
+        match &bc.code[2] {
+            Opcode::BinOpAssignNext { op, off } => {
+                assert!(matches!(op, Op2::Mul));
+                assert_eq!(*off, 3);
+            }
+            _ => panic!("expected BinOpAssignNext"),
+        }
+    }
+
+    #[test]
+    fn test_peephole_all_op2_variants_fuse() {
+        // Verify every Op2 variant can be fused with AssignCurr
+        let ops = [
+            Op2::Add,
+            Op2::Sub,
+            Op2::Mul,
+            Op2::Div,
+            Op2::Exp,
+            Op2::Mod,
+            Op2::Gt,
+            Op2::Gte,
+            Op2::Lt,
+            Op2::Lte,
+            Op2::Eq,
+            Op2::And,
+            Op2::Or,
+        ];
+        for op in ops {
+            let mut bc = ByteCode {
+                code: vec![Opcode::Op2 { op }, Opcode::AssignCurr { off: 10 }],
+                literals: vec![],
+            };
+            bc.peephole_optimize();
+            assert_eq!(bc.code.len(), 1, "failed for op variant");
+            assert!(matches!(bc.code[0], Opcode::BinOpAssignCurr { .. }));
+        }
+    }
+
+    #[test]
+    fn test_peephole_multiple_fusions() {
+        // Two independent fusion opportunities in sequence
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadConstant { id: 0 },
+                Opcode::AssignCurr { off: 0 },
+                Opcode::LoadVar { off: 1 },
+                Opcode::LoadVar { off: 2 },
+                Opcode::Op2 { op: Op2::Sub },
+                Opcode::AssignCurr { off: 3 },
+            ],
+            literals: vec![1.0],
+        };
+        bc.peephole_optimize();
+
+        // LoadConstant+AssignCurr -> AssignConstCurr
+        // LoadVar, LoadVar stay
+        // Op2+AssignCurr -> BinOpAssignCurr
+        assert_eq!(bc.code.len(), 4);
+        assert!(matches!(bc.code[0], Opcode::AssignConstCurr { .. }));
+        assert!(matches!(bc.code[1], Opcode::LoadVar { off: 1 }));
+        assert!(matches!(bc.code[2], Opcode::LoadVar { off: 2 }));
+        assert!(matches!(bc.code[3], Opcode::BinOpAssignCurr { .. }));
+    }
+
+    #[test]
+    fn test_peephole_mixed_fusible_and_nonfusible() {
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadVar { off: 0 },
+                Opcode::Not {},
+                Opcode::LoadConstant { id: 0 },
+                Opcode::AssignCurr { off: 1 },
+                Opcode::LoadVar { off: 2 },
+                Opcode::Ret,
+            ],
+            literals: vec![0.0],
+        };
+        bc.peephole_optimize();
+
+        // LoadVar, Not stay; LoadConstant+AssignCurr fuse; LoadVar, Ret stay
+        assert_eq!(bc.code.len(), 5);
+        assert!(matches!(bc.code[0], Opcode::LoadVar { off: 0 }));
+        assert!(matches!(bc.code[1], Opcode::Not {}));
+        assert!(matches!(bc.code[2], Opcode::AssignConstCurr { .. }));
+        assert!(matches!(bc.code[3], Opcode::LoadVar { off: 2 }));
+        assert!(matches!(bc.code[4], Opcode::Ret));
+    }
+
+    #[test]
+    fn test_peephole_jump_target_prevents_fusion() {
+        // If instruction i+1 is a jump target, don't fuse i with i+1.
+        // Layout (before optimization):
+        //   0: LoadConstant { id: 0 }       <- loop body start (jump target)
+        //   1: AssignCurr { off: 0 }
+        //   2: NextIterOrJump { jump_back: -2 }  (target = 2 + (-2) = 0)
+        //   3: Ret
+        //
+        // Instruction 0 is a jump target, so even though 0 is LoadConstant
+        // and 1 is AssignCurr, we should NOT fuse them because instruction 0
+        // is a jump target. Wait -- actually the check is whether i+1 is a
+        // jump target. Here instruction 0 IS a jump target. The optimizer checks
+        // `!jump_targets[i + 1]` to decide whether to fuse i with i+1.
+        //
+        // For i=0: jump_targets[1] is false, so fusion IS allowed.
+        // The jump target protection matters when the SECOND instruction of a
+        // potential pair is a jump target. Let's build that scenario:
+        //
+        //   0: Ret                            <- something before the loop
+        //   1: LoadVar { off: 5 }             <- jump target (loop body start)
+        //   2: NextIterOrJump { jump_back: -1 }  (target = 2 + (-1) = 1)
+        //   3: Ret
+        //
+        // For i=0 (Ret): can_fuse checks jump_targets[1] = true -> no fusion.
+        // This prevents fusing Ret with LoadVar, which is correct.
+        //
+        // A more realistic scenario: Op2 followed by AssignCurr where the
+        // AssignCurr is a jump target.
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::Op2 { op: Op2::Add },             // 0
+                Opcode::AssignCurr { off: 0 },            // 1 -- jump target
+                Opcode::NextIterOrJump { jump_back: -1 }, // 2 -> target = 2-1 = 1
+                Opcode::Ret,                              // 3
+            ],
+            literals: vec![],
+        };
+        bc.peephole_optimize();
+
+        // Fusion of 0+1 should be prevented because instruction 1 is a jump target
+        assert_eq!(bc.code.len(), 4);
+        assert!(matches!(bc.code[0], Opcode::Op2 { op: Op2::Add }));
+        assert!(matches!(bc.code[1], Opcode::AssignCurr { off: 0 }));
+        assert!(matches!(bc.code[2], Opcode::NextIterOrJump { .. }));
+        assert!(matches!(bc.code[3], Opcode::Ret));
+    }
+
+    #[test]
+    fn test_peephole_jump_target_only_blocks_specific_pair() {
+        // Verify that a jump target only blocks fusion of the pair where
+        // the second instruction is the target, not other pairs.
+        //
+        //   0: LoadConstant { id: 0 }
+        //   1: AssignCurr { off: 0 }         <- NOT a jump target, so 0+1 CAN fuse
+        //   2: LoadVar { off: 5 }            <- jump target
+        //   3: NextIterOrJump { jump_back: -1 }  (target = 3-1 = 2)
+        //   4: Ret
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadConstant { id: 0 },
+                Opcode::AssignCurr { off: 0 },
+                Opcode::LoadVar { off: 5 },
+                Opcode::NextIterOrJump { jump_back: -1 },
+                Opcode::Ret,
+            ],
+            literals: vec![1.0],
+        };
+        bc.peephole_optimize();
+
+        // 0+1 should fuse (neither target), 2 stays (it's a jump target, but
+        // the previous instruction was AssignCurr which doesn't match any pattern
+        // anyway), 3 stays, 4 stays
+        assert_eq!(bc.code.len(), 4);
+        assert!(matches!(
+            bc.code[0],
+            Opcode::AssignConstCurr {
+                off: 0,
+                literal_id: 0
+            }
+        ));
+        assert!(matches!(bc.code[1], Opcode::LoadVar { off: 5 }));
+        assert!(matches!(bc.code[2], Opcode::NextIterOrJump { .. }));
+        assert!(matches!(bc.code[3], Opcode::Ret));
+    }
+
+    #[test]
+    fn test_peephole_jump_offset_recalculation_next_iter() {
+        // When fusion shrinks the code, jump offsets must be recalculated.
+        // This test places a fusion BEFORE the loop (outside the jump target
+        // to jump instruction range) so the fixup works correctly.
+        //
+        // Before optimization:
+        //   0: LoadConstant { id: 0 }    \
+        //   1: AssignCurr { off: 0 }     / -> fuse
+        //   2: LoadVar { off: 1 }        <- jump target
+        //   3: AssignCurr { off: 2 }
+        //   4: NextIterOrJump { jump_back: -2 }  target = 4+(-2) = 2
+        //   5: Ret
+        //
+        // After optimization:
+        //   0: AssignConstCurr            (fused 0+1)
+        //   1: LoadVar { off: 1 }         (jump target)
+        //   2: AssignCurr { off: 2 }
+        //   3: NextIterOrJump { jump_back: -2 }  (loop body unchanged)
+        //   4: Ret
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadConstant { id: 0 },           // 0
+                Opcode::AssignCurr { off: 0 },            // 1
+                Opcode::LoadVar { off: 1 },               // 2 (jump target)
+                Opcode::AssignCurr { off: 2 },            // 3
+                Opcode::NextIterOrJump { jump_back: -2 }, // 4, target=2
+                Opcode::Ret,                              // 5
+            ],
+            literals: vec![1.0],
+        };
+        bc.peephole_optimize();
+
+        assert_eq!(bc.code.len(), 5);
+        assert!(matches!(bc.code[0], Opcode::AssignConstCurr { .. }));
+        assert!(matches!(bc.code[1], Opcode::LoadVar { off: 1 }));
+        assert!(matches!(bc.code[2], Opcode::AssignCurr { off: 2 }));
+        match &bc.code[3] {
+            Opcode::NextIterOrJump { jump_back } => {
+                assert_eq!(*jump_back, -2, "jump_back should remain -2");
+            }
+            _ => panic!("expected NextIterOrJump"),
+        }
+        assert!(matches!(bc.code[4], Opcode::Ret));
+    }
+
+    #[test]
+    fn test_peephole_fusion_inside_loop_body() {
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadVar { off: 0 },               // 0 (jump target)
+                Opcode::Op2 { op: Op2::Add },             // 1 \
+                Opcode::AssignCurr { off: 1 },            // 2 / fuse
+                Opcode::NextIterOrJump { jump_back: -3 }, // 3, target=0
+                Opcode::Ret,                              // 4
+            ],
+            literals: vec![],
+        };
+        bc.peephole_optimize();
+
+        // 1+2 fuse -> BinOpAssignCurr
+        // Result: [LoadVar, BinOpAssignCurr, NextIterOrJump, Ret]
+        assert_eq!(bc.code.len(), 4);
+        assert!(matches!(bc.code[0], Opcode::LoadVar { off: 0 }));
+        assert!(matches!(
+            bc.code[1],
+            Opcode::BinOpAssignCurr {
+                op: Op2::Add,
+                off: 1
+            }
+        ));
+        match &bc.code[2] {
+            Opcode::NextIterOrJump { jump_back } => {
+                // new PC 2, target should be new PC 0 -> jump_back = -2
+                assert_eq!(*jump_back, -2);
+            }
+            other => panic!(
+                "expected NextIterOrJump, got {:?}",
+                std::mem::discriminant(other)
+            ),
+        }
+        assert!(matches!(bc.code[3], Opcode::Ret));
+    }
+
+    #[test]
+    fn test_peephole_jump_offset_recalculation_next_broadcast() {
+        // Same as above but with NextBroadcastOrJump
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadConstant { id: 0 },                // 0
+                Opcode::AssignCurr { off: 0 },                 // 1
+                Opcode::LoadVar { off: 1 },                    // 2 (jump target)
+                Opcode::NextBroadcastOrJump { jump_back: -1 }, // 3, target=2
+                Opcode::Ret,                                   // 4
+            ],
+            literals: vec![1.0],
+        };
+        bc.peephole_optimize();
+
+        // 0+1 fuse -> AssignConstCurr at new PC 0
+        // 2 -> new PC 1 (jump target)
+        // 3 -> new PC 2
+        // 4 -> new PC 3
+        assert_eq!(bc.code.len(), 4);
+        assert!(matches!(bc.code[0], Opcode::AssignConstCurr { .. }));
+        assert!(matches!(bc.code[1], Opcode::LoadVar { off: 1 }));
+        match &bc.code[2] {
+            Opcode::NextBroadcastOrJump { jump_back } => {
+                // new PC 2, target should be new PC 1
+                assert_eq!(*jump_back, -1, "jump_back should be -1");
+            }
+            _ => panic!("expected NextBroadcastOrJump"),
+        }
+        assert!(matches!(bc.code[3], Opcode::Ret));
+    }
+
+    #[test]
+    fn test_peephole_no_fusion_when_patterns_dont_match() {
+        // Op2 followed by something other than AssignCurr/AssignNext
+        let mut bc = ByteCode {
+            code: vec![Opcode::Op2 { op: Op2::Add }, Opcode::Not {}, Opcode::Ret],
+            literals: vec![],
+        };
+        bc.peephole_optimize();
+
+        assert_eq!(bc.code.len(), 3);
+        assert!(matches!(bc.code[0], Opcode::Op2 { op: Op2::Add }));
+        assert!(matches!(bc.code[1], Opcode::Not {}));
+    }
+
+    #[test]
+    fn test_peephole_load_constant_not_followed_by_assign_curr() {
+        // LoadConstant not followed by AssignCurr should not fuse
+        let mut bc = ByteCode {
+            code: vec![Opcode::LoadConstant { id: 0 }, Opcode::Not {}, Opcode::Ret],
+            literals: vec![1.0],
+        };
+        bc.peephole_optimize();
+
+        assert_eq!(bc.code.len(), 3);
+        assert!(matches!(bc.code[0], Opcode::LoadConstant { id: 0 }));
+    }
+
+    #[test]
+    fn test_peephole_via_builder() {
+        // Verify that ByteCodeBuilder::finish() runs peephole_optimize
+        let mut builder = ByteCodeBuilder::default();
+        let lit_id = builder.intern_literal(3.14);
+        builder.push_opcode(Opcode::LoadConstant { id: lit_id });
+        builder.push_opcode(Opcode::AssignCurr { off: 7 });
+        builder.push_opcode(Opcode::Ret);
+
+        let bc = builder.finish();
+        assert_eq!(bc.code.len(), 2);
+        match &bc.code[0] {
+            Opcode::AssignConstCurr { off, literal_id } => {
+                assert_eq!(*off, 7);
+                assert_eq!(*literal_id, lit_id);
+            }
+            _ => panic!("expected AssignConstCurr after builder finish"),
+        }
+        assert!(matches!(bc.code[1], Opcode::Ret));
+    }
+
+    #[test]
+    fn test_peephole_consecutive_fusions_chain() {
+        // Three consecutive fusible pairs
+        let mut bc = ByteCode {
+            code: vec![
+                Opcode::LoadConstant { id: 0 },
+                Opcode::AssignCurr { off: 0 },
+                Opcode::LoadConstant { id: 1 },
+                Opcode::AssignCurr { off: 1 },
+                Opcode::Op2 { op: Op2::Div },
+                Opcode::AssignNext { off: 2 },
+            ],
+            literals: vec![1.0, 2.0],
+        };
+        bc.peephole_optimize();
+
+        assert_eq!(bc.code.len(), 3);
+        assert!(matches!(
+            bc.code[0],
+            Opcode::AssignConstCurr {
+                off: 0,
+                literal_id: 0
+            }
+        ));
+        assert!(matches!(
+            bc.code[1],
+            Opcode::AssignConstCurr {
+                off: 1,
+                literal_id: 1
+            }
+        ));
+        match &bc.code[2] {
+            Opcode::BinOpAssignNext { op, off } => {
+                assert!(matches!(op, Op2::Div));
+                assert_eq!(*off, 2);
+            }
+            _ => panic!("expected BinOpAssignNext"),
+        }
+    }
+
+    #[test]
+    fn test_peephole_last_instruction_not_fused_alone() {
+        // If the fusible first instruction is the very last one, no fusion happens
+        let mut bc = ByteCode {
+            code: vec![Opcode::Ret, Opcode::LoadConstant { id: 0 }],
+            literals: vec![1.0],
+        };
+        bc.peephole_optimize();
+
+        assert_eq!(bc.code.len(), 2);
+        assert!(matches!(bc.code[0], Opcode::Ret));
+        assert!(matches!(bc.code[1], Opcode::LoadConstant { id: 0 }));
+    }
+
+    // =========================================================================
+    // DimList Side Table Tests
+    // =========================================================================
+
+    #[test]
+    fn test_dim_list_add_and_get() {
+        let mut ctx = ByteCodeContext::default();
+
+        let id = ctx.add_dim_list(2, [10, 20, 0, 0]);
+        assert_eq!(id, 0);
+
+        let (n_dims, ids) = ctx.get_dim_list(id);
+        assert_eq!(n_dims, 2);
+        assert_eq!(ids[0], 10);
+        assert_eq!(ids[1], 20);
+    }
+
+    #[test]
+    fn test_dim_list_multiple_entries() {
+        let mut ctx = ByteCodeContext::default();
+
+        let id0 = ctx.add_dim_list(1, [5, 0, 0, 0]);
+        let id1 = ctx.add_dim_list(3, [1, 2, 3, 0]);
+        let id2 = ctx.add_dim_list(4, [10, 20, 30, 40]);
+
+        assert_eq!(id0, 0);
+        assert_eq!(id1, 1);
+        assert_eq!(id2, 2);
+
+        let (n, ids) = ctx.get_dim_list(id0);
+        assert_eq!(n, 1);
+        assert_eq!(ids[0], 5);
+
+        let (n, ids) = ctx.get_dim_list(id1);
+        assert_eq!(n, 3);
+        assert_eq!(&ids[..3], &[1, 2, 3]);
+
+        let (n, ids) = ctx.get_dim_list(id2);
+        assert_eq!(n, 4);
+        assert_eq!(ids, &[10, 20, 30, 40]);
+    }
+
+    #[test]
+    fn test_dim_list_zero_dims() {
+        let mut ctx = ByteCodeContext::default();
+
+        let id = ctx.add_dim_list(0, [0, 0, 0, 0]);
+        let (n_dims, _ids) = ctx.get_dim_list(id);
+        assert_eq!(n_dims, 0);
+    }
+
+    #[test]
+    fn test_dim_list_incremental_ids() {
+        let mut ctx = ByteCodeContext::default();
+
+        // Add several entries and verify IDs are sequential
+        for i in 0..10u16 {
+            let id = ctx.add_dim_list(1, [i, 0, 0, 0]);
+            assert_eq!(id, i, "dim list IDs should be assigned sequentially");
+        }
+
+        // Verify all entries are still retrievable
+        for i in 0..10u16 {
+            let (n, ids) = ctx.get_dim_list(i);
+            assert_eq!(n, 1);
+            assert_eq!(ids[0], i);
+        }
+    }
 }
 
 /// A single variable's compiled initial-value bytecode, along with the
diff --git a/src/simlin-engine/src/compiler.rs b/src/simlin-engine/src/compiler.rs
index 67e80e5c..9dcc139f 100644
--- a/src/simlin-engine/src/compiler.rs
+++ b/src/simlin-engine/src/compiler.rs
@@ -11,9 +11,9 @@ use crate::ast::{
 };
 use crate::bytecode::{
     BuiltinId, ByteCode, ByteCodeBuilder, ByteCodeContext, CompiledInitial, CompiledModule, DimId,
-    DimensionInfo, GraphicalFunctionId, LookupMode, ModuleDeclaration, ModuleId, ModuleInputOffset,
-    NameId, Op2, Opcode, RuntimeSparseMapping, StaticArrayView, SubdimensionRelation, TempId,
-    VariableOffset, ViewId,
+    DimListId, DimensionInfo, GraphicalFunctionId, LookupMode, ModuleDeclaration, ModuleId,
+    ModuleInputOffset, NameId, Op2, Opcode, RuntimeSparseMapping, StaticArrayView,
+    SubdimensionRelation, TempId, VariableOffset, ViewId,
 };
 use crate::common::{
     Canonical, CanonicalElementName, ErrorCode, ErrorKind, Ident, Result, canonicalize,
@@ -3734,6 +3734,7 @@ struct Compiler<'module> {
     subdim_relations: Vec<SubdimensionRelation>,
     names: Vec<String>,
     static_views: Vec<StaticArrayView>,
+    dim_lists: Vec<(u8, [u16; 4])>,
     // Iteration context - set when compiling inside AssignTemp
     in_iteration: bool,
     /// When in optimized iteration mode, maps pre-pushed views to their stack offset.
@@ -3766,6 +3767,7 @@ impl<'module> Compiler<'module> {
             subdim_relations: vec![],
             names: vec![],
             static_views: vec![],
+            dim_lists: Vec::new(),
             in_iteration: false,
             iter_source_views: None,
         };
@@ -4009,10 +4011,11 @@ impl<'module> Compiler<'module> {
                 for (i, &bound) in bounds.iter().take(4).enumerate() {
                     dims[i] = bound as u16;
                 }
+                let dim_list_id = self.dim_lists.len() as DimListId;
+                self.dim_lists.push((n_dims, dims));
                 self.push(Opcode::PushVarViewDirect {
                     base_off: *off as u16,
-                    n_dims,
-                    dims,
+                    dim_list_id,
                 });
 
                 // Apply each subscript index to the view.
@@ -4950,6 +4953,7 @@ impl<'module> Compiler<'module> {
                 static_views: self.static_views,
                 temp_offsets,
                 temp_total_size,
+                dim_lists: self.dim_lists,
             }),
             compiled_initials,
             compiled_flows,
diff --git a/src/simlin-engine/src/lib.rs b/src/simlin-engine/src/lib.rs
index f7ba094f..9a0dc280 100644
--- a/src/simlin-engine/src/lib.rs
+++ b/src/simlin-engine/src/lib.rs
@@ -2,7 +2,13 @@
 // Use of this source code is governed by the Apache License,
 // Version 2.0, that can be found in the LICENSE file.
 
-#![forbid(unsafe_code)]
+// deny (not forbid) because vm.rs Stack needs a targeted #[allow(unsafe_code)]
+// for unchecked array access in the hot dispatch loop. Rust's forbid() cannot
+// be overridden by inner #[allow] attributes (even in submodules), so deny()
+// is the strongest level that still permits a single opt-in. The unsafe stack
+// access is proven safe by ByteCodeBuilder::finish(), which statically validates
+// that compiled bytecode cannot exceed STACK_CAPACITY.
+#![deny(unsafe_code)]
 
 pub use prost;
 
diff --git a/src/simlin-engine/src/vm.rs b/src/simlin-engine/src/vm.rs
index aa8ddcd3..2c9d2bf1 100644
--- a/src/simlin-engine/src/vm.rs
+++ b/src/simlin-engine/src/vm.rs
@@ -10,7 +10,7 @@ use smallvec::SmallVec;
 
 use crate::bytecode::{
     BuiltinId, ByteCode, ByteCodeContext, CompiledInitial, CompiledModule, DimId, LookupMode,
-    ModuleId, Op2, Opcode, RuntimeView, TempId,
+    ModuleId, Op2, Opcode, RuntimeView, STACK_CAPACITY, TempId,
 };
 use crate::common::{Canonical, Ident, Result};
 use crate::dimensions::{Dimension, match_dimensions_two_pass};
@@ -93,6 +93,25 @@ pub(crate) fn is_truthy(n: f64) -> bool {
     !is_false
 }
 
+#[inline(always)]
+fn eval_op2(op: Op2, l: f64, r: f64) -> f64 {
+    match op {
+        Op2::Add => l + r,
+        Op2::Sub => l - r,
+        Op2::Exp => l.powf(r),
+        Op2::Mul => l * r,
+        Op2::Div => l / r,
+        Op2::Mod => l.rem_euclid(r),
+        Op2::Gt => (l > r) as i8 as f64,
+        Op2::Gte => (l >= r) as i8 as f64,
+        Op2::Lt => (l < r) as i8 as f64,
+        Op2::Lte => (l <= r) as i8 as f64,
+        Op2::Eq => approx_eq!(f64, l, r) as i8 as f64,
+        Op2::And => (is_truthy(l) && is_truthy(r)) as i8 as f64,
+        Op2::Or => (is_truthy(l) || is_truthy(r)) as i8 as f64,
+    }
+}
+
 #[cfg_attr(feature = "debug-derive", derive(Debug))]
 #[derive(Clone)]
 pub struct CompiledSimulation {
@@ -203,28 +222,77 @@ pub struct Vm {
     initial_offsets: HashSet<usize>,
 }
 
-#[cfg_attr(feature = "debug-derive", derive(Debug))]
 #[derive(Clone)]
 struct Stack {
-    stack: Vec<f64>,
+    data: [f64; STACK_CAPACITY],
+    top: usize,
+}
+
+#[cfg(feature = "debug-derive")]
+impl std::fmt::Debug for Stack {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Stack")
+            .field("top", &self.top)
+            .field("data", &&self.data[..self.top])
+            .finish()
+    }
 }
 
+#[allow(unsafe_code)]
 impl Stack {
     fn new() -> Self {
         Stack {
-            stack: Vec::with_capacity(32),
+            data: [0.0; STACK_CAPACITY],
+            top: 0,
         }
     }
     #[inline(always)]
     fn push(&mut self, value: f64) {
-        self.stack.push(value)
+        debug_assert!(self.top < STACK_CAPACITY, "stack overflow");
+        // SAFETY: ByteCodeBuilder::finish() statically validates that the max
+        // stack depth of all compiled bytecode is < STACK_CAPACITY, so this
+        // bound cannot be exceeded at runtime. The debug_assert serves as a
+        // belt-and-suspenders check during development.
+        unsafe {
+            *self.data.get_unchecked_mut(self.top) = value;
+        }
+        self.top += 1;
     }
     #[inline(always)]
     fn pop(&mut self) -> f64 {
-        self.stack.pop().unwrap()
+        debug_assert!(self.top > 0, "stack underflow");
+        self.top -= 1;
+        // SAFETY: ByteCodeBuilder::finish() validates via checked_sub that no
+        // opcode sequence pops more values than have been pushed (i.e. the stack
+        // depth never goes negative). This guarantees top > 0 before every pop
+        // at runtime. The debug_assert is a belt-and-suspenders check.
+        unsafe { *self.data.get_unchecked(self.top) }
+    }
+    #[inline(always)]
+    fn len(&self) -> usize {
+        self.top
+    }
+    #[inline(always)]
+    fn clear(&mut self) {
+        self.top = 0;
     }
 }
 
+/// Mutable evaluation state grouped into a single struct to reduce argument
+/// count in eval functions (was 11-14 args, now 6-10).  In `eval_bytecode`,
+/// the fields are destructured into local reborrows for ergonomic access;
+/// for recursive `EvalModule` calls they must be re-packed into a temporary
+/// `EvalState` because the borrow checker cannot split the struct across the
+/// call boundary.
+#[cfg_attr(feature = "debug-derive", derive(Debug))]
+struct EvalState<'a> {
+    stack: &'a mut Stack,
+    temp_storage: &'a mut [f64],
+    view_stack: &'a mut Vec<RuntimeView>,
+    iter_stack: &'a mut Vec<IterState>,
+    broadcast_stack: &'a mut Vec<BroadcastState>,
+}
+
 #[cfg_attr(feature = "debug-derive", derive(Debug))]
 #[derive(Clone)]
 struct CompiledModuleSlice {
@@ -364,11 +432,9 @@ impl Vm {
 
         let save_every = std::cmp::max(1, (save_step / dt + 0.5).floor() as usize);
 
-        self.stack.stack.clear();
+        self.stack.clear();
         let module_inputs: &[f64] = &[0.0; 0];
-        let mut data = None;
-        std::mem::swap(&mut data, &mut self.data);
-        let mut data = data.unwrap();
+        let mut data = self.data.take().unwrap();
 
         let module_flows = &self.sliced_sim.flow_modules[&self.root];
         let module_stocks = &self.sliced_sim.stock_modules[&self.root];
@@ -377,6 +443,14 @@ impl Vm {
         self.iter_stack.clear();
         self.broadcast_stack.clear();
 
+        let mut state = EvalState {
+            stack: &mut self.stack,
+            temp_storage: &mut self.temp_storage,
+            view_stack: &mut self.view_stack,
+            iter_stack: &mut self.iter_stack,
+            broadcast_stack: &mut self.broadcast_stack,
+        };
+
         loop {
             let (curr, next) = borrow_two(&mut data, n_slots, self.curr_chunk, self.next_chunk);
             if curr[TIME_OFF] > end {
@@ -385,34 +459,25 @@ impl Vm {
 
             Self::eval(
                 &self.sliced_sim,
-                &mut self.temp_storage,
+                &mut state,
                 module_flows,
                 0,
                 module_inputs,
                 curr,
                 next,
-                &mut self.stack,
-                &mut self.view_stack,
-                &mut self.iter_stack,
-                &mut self.broadcast_stack,
             );
             Self::eval(
                 &self.sliced_sim,
-                &mut self.temp_storage,
+                &mut state,
                 module_stocks,
                 0,
                 module_inputs,
                 curr,
                 next,
-                &mut self.stack,
-                &mut self.view_stack,
-                &mut self.iter_stack,
-                &mut self.broadcast_stack,
             );
+            // Only TIME changes per step; DT, INITIAL_TIME, FINAL_TIME are
+            // invariant and already set in every chunk slot during initials.
             next[TIME_OFF] = curr[TIME_OFF] + dt;
-            next[DT_OFF] = curr[DT_OFF];
-            next[INITIAL_TIME_OFF] = curr[INITIAL_TIME_OFF];
-            next[FINAL_TIME_OFF] = curr[FINAL_TIME_OFF];
 
             self.step_accum += 1;
             let is_initial_timestep = (self.curr_chunk == 0) && (curr[TIME_OFF] == spec_start);
@@ -447,14 +512,20 @@ impl Vm {
 
     pub fn set_value_now(&mut self, off: usize, val: f64) {
         let start = self.curr_chunk * self.n_slots;
-        let mut data = None;
-        std::mem::swap(&mut data, &mut self.data);
-        let mut data = data.unwrap();
+        let data = self.data.as_mut().unwrap();
         data[start + off] = val;
-        self.data = Some(data);
     }
 
+    /// Read the current value of a variable by its data buffer offset.
+    ///
+    /// Precondition: `run_initials()` must have been called since the last
+    /// `reset()`. After `reset()` but before `run_initials()`, the data buffer
+    /// may contain stale values from the previous simulation run.
     pub fn get_value_now(&self, off: usize) -> f64 {
+        debug_assert!(
+            self.did_initials,
+            "get_value_now called before run_initials; data buffer may contain stale values"
+        );
         let start = self.curr_chunk * self.n_slots;
         self.data.as_ref().unwrap()[start + off]
     }
@@ -478,16 +549,19 @@ impl Vm {
 
     /// Reset the VM to its pre-simulation state, reusing the data buffer allocation.
     /// Overrides are preserved across reset.
+    ///
+    /// The data buffer is NOT zeroed here because `run_initials()` fully
+    /// reinitializes all variable slots and pre-fills DT/INITIAL_TIME/FINAL_TIME
+    /// across all chunk slots. The `did_initials` flag (reset to false here)
+    /// prevents `run_to()` from executing on stale data -- it returns early
+    /// if `run_initials()` has not been called since the last reset.
     pub fn reset(&mut self) {
-        if let Some(ref mut data) = self.data {
-            data.fill(0.0);
-        }
         self.curr_chunk = 0;
         self.next_chunk = 1;
         self.did_initials = false;
         self.step_accum = 0;
         self.temp_storage.fill(0.0);
-        self.stack.stack.clear();
+        self.stack.clear();
         self.view_stack.clear();
         self.iter_stack.clear();
         self.broadcast_stack.clear();
@@ -552,11 +626,9 @@ impl Vm {
         let spec_stop = self.specs.stop;
         let dt = self.specs.dt;
 
-        self.stack.stack.clear();
+        self.stack.clear();
         let module_inputs: &[f64] = &[0.0; 0];
-        let mut data = None;
-        std::mem::swap(&mut data, &mut self.data);
-        let mut data = data.unwrap();
+        let mut data = self.data.take().unwrap();
 
         let (curr, next) = borrow_two(&mut data, self.n_slots, self.curr_chunk, self.next_chunk);
         curr[TIME_OFF] = spec_start;
@@ -568,20 +640,36 @@ impl Vm {
         self.iter_stack.clear();
         self.broadcast_stack.clear();
 
+        let mut state = EvalState {
+            stack: &mut self.stack,
+            temp_storage: &mut self.temp_storage,
+            view_stack: &mut self.view_stack,
+            iter_stack: &mut self.iter_stack,
+            broadcast_stack: &mut self.broadcast_stack,
+        };
+
         Self::eval_initials_with_overrides(
             &self.sliced_sim,
-            &mut self.temp_storage,
+            &mut state,
             &self.root,
             0,
             module_inputs,
             curr,
             next,
-            &mut self.stack,
             &self.overrides,
-            &mut self.view_stack,
-            &mut self.iter_stack,
-            &mut self.broadcast_stack,
         );
+
+        // Pre-fill DT, INITIAL_TIME, and FINAL_TIME across all chunk slots so
+        // run_to only needs to advance TIME per step.
+        let n_slots = self.n_slots;
+        let total_chunks = self.n_chunks + 2;
+        for chunk in 0..total_chunks {
+            let base = chunk * n_slots;
+            data[base + DT_OFF] = dt;
+            data[base + INITIAL_TIME_OFF] = spec_start;
+            data[base + FINAL_TIME_OFF] = spec_stop;
+        }
+
         self.did_initials = true;
         self.step_accum = 0;
 
@@ -620,18 +708,14 @@ impl Vm {
     #[inline(never)]
     fn eval_module_initials_with_overrides(
         sliced_sim: &CompiledSlicedSimulation,
-        temp_storage: &mut [f64],
+        state: &mut EvalState<'_>,
         parent_context: &ByteCodeContext,
         parent_module_off: usize,
         module_inputs: &[f64],
         curr: &mut [f64],
         next: &mut [f64],
-        stack: &mut Stack,
         id: ModuleId,
         overrides: &HashMap<usize, f64>,
-        view_stack: &mut Vec<RuntimeView>,
-        iter_stack: &mut Vec<IterState>,
-        broadcast_stack: &mut Vec<BroadcastState>,
     ) {
         let new_module_decl = &parent_context.modules[id as usize];
         let module_key = make_module_key(&new_module_decl.model_name, &new_module_decl.input_set);
@@ -639,17 +723,13 @@ impl Vm {
 
         Self::eval_initials_with_overrides(
             sliced_sim,
-            temp_storage,
+            state,
             &module_key,
             module_off,
             module_inputs,
             curr,
             next,
-            stack,
             overrides,
-            view_stack,
-            iter_stack,
-            broadcast_stack,
         );
     }
 
@@ -658,39 +738,28 @@ impl Vm {
     #[allow(clippy::too_many_arguments)]
     fn eval_initials_with_overrides(
         sliced_sim: &CompiledSlicedSimulation,
-        temp_storage: &mut [f64],
+        state: &mut EvalState<'_>,
         module_key: &ModuleKey,
         module_off: usize,
         module_inputs: &[f64],
         curr: &mut [f64],
         next: &mut [f64],
-        stack: &mut Stack,
         overrides: &HashMap<usize, f64>,
-        view_stack: &mut Vec<RuntimeView>,
-        iter_stack: &mut Vec<IterState>,
-        broadcast_stack: &mut Vec<BroadcastState>,
     ) {
         let module_initials = &sliced_sim.initial_modules[module_key];
         let context = &module_initials.context;
         for compiled_initial in module_initials.initials.iter() {
             Self::eval_single_initial(
                 sliced_sim,
-                temp_storage,
+                state,
                 context,
                 &compiled_initial.bytecode,
                 module_off,
                 module_inputs,
                 curr,
                 next,
-                stack,
                 overrides,
-                view_stack,
-                iter_stack,
-                broadcast_stack,
             );
-            // Evaluate-then-patch: apply overrides after bytecode completes.
-            // CompiledInitial offsets are module-relative; add module_off
-            // to get the absolute position in the flattened data buffer.
             for &off in &compiled_initial.offsets {
                 let abs_off = module_off + off;
                 if let Some(&val) = overrides.get(&abs_off) {
@@ -704,22 +773,18 @@ impl Vm {
     #[allow(clippy::too_many_arguments)]
     fn eval_single_initial(
         sliced_sim: &CompiledSlicedSimulation,
-        temp_storage: &mut [f64],
+        state: &mut EvalState<'_>,
         context: &ByteCodeContext,
         bytecode: &ByteCode,
         module_off: usize,
         module_inputs: &[f64],
         curr: &mut [f64],
         next: &mut [f64],
-        stack: &mut Stack,
         overrides: &HashMap<usize, f64>,
-        view_stack: &mut Vec<RuntimeView>,
-        iter_stack: &mut Vec<IterState>,
-        broadcast_stack: &mut Vec<BroadcastState>,
     ) {
         Self::eval_bytecode(
             sliced_sim,
-            temp_storage,
+            state,
             context,
             bytecode,
             StepPart::Initials,
@@ -727,31 +792,23 @@ impl Vm {
             module_inputs,
             curr,
             next,
-            stack,
             overrides,
-            view_stack,
-            iter_stack,
-            broadcast_stack,
         );
     }
 
-    #[allow(clippy::too_many_arguments)]
+    #[inline(always)]
     fn eval(
         sliced_sim: &CompiledSlicedSimulation,
-        temp_storage: &mut [f64],
+        state: &mut EvalState<'_>,
         module: &CompiledModuleSlice,
         module_off: usize,
         module_inputs: &[f64],
         curr: &mut [f64],
         next: &mut [f64],
-        stack: &mut Stack,
-        view_stack: &mut Vec<RuntimeView>,
-        iter_stack: &mut Vec<IterState>,
-        broadcast_stack: &mut Vec<BroadcastState>,
     ) {
         Self::eval_bytecode(
             sliced_sim,
-            temp_storage,
+            state,
             &module.context,
             &module.bytecode,
             module.part,
@@ -759,18 +816,14 @@ impl Vm {
             module_inputs,
             curr,
             next,
-            stack,
             &EMPTY_OVERRIDES,
-            view_stack,
-            iter_stack,
-            broadcast_stack,
         );
     }
 
     #[allow(clippy::too_many_arguments)]
     fn eval_bytecode(
         sliced_sim: &CompiledSlicedSimulation,
-        temp_storage: &mut [f64],
+        state: &mut EvalState<'_>,
         context: &ByteCodeContext,
         bytecode: &ByteCode,
         part: StepPart,
@@ -778,13 +831,18 @@ impl Vm {
         module_inputs: &[f64],
         curr: &mut [f64],
         next: &mut [f64],
-        stack: &mut Stack,
         overrides: &HashMap<usize, f64>,
-        view_stack: &mut Vec<RuntimeView>,
-        iter_stack: &mut Vec<IterState>,
-        broadcast_stack: &mut Vec<BroadcastState>,
     ) {
-        // Existing state
+        // Destructure EvalState into local reborrows so the opcode loop can use
+        // them directly.  For recursive EvalModule calls we must re-pack into a
+        // temporary EvalState (and destructure again afterward) because holding
+        // individual &mut borrows from the struct would prevent passing &mut EvalState.
+        let mut stack = &mut *state.stack;
+        let mut temp_storage = &mut *state.temp_storage;
+        let mut view_stack = &mut *state.view_stack;
+        let mut iter_stack = &mut *state.iter_stack;
+        let mut broadcast_stack = &mut *state.broadcast_stack;
+
         let mut condition = false;
         let mut subscript_index: SmallVec<[(u16, u16); 4]> = SmallVec::new();
         let mut subscript_index_valid = true;
@@ -798,22 +856,7 @@ impl Vm {
                 Opcode::Op2 { op } => {
                     let r = stack.pop();
                     let l = stack.pop();
-                    let result = match op {
-                        Op2::Add => l + r,
-                        Op2::Sub => l - r,
-                        Op2::Exp => l.powf(r),
-                        Op2::Mul => l * r,
-                        Op2::Div => l / r,
-                        Op2::Mod => l.rem_euclid(r),
-                        Op2::Gt => (l > r) as i8 as f64,
-                        Op2::Gte => (l >= r) as i8 as f64,
-                        Op2::Lt => (l < r) as i8 as f64,
-                        Op2::Lte => (l <= r) as i8 as f64,
-                        Op2::Eq => approx_eq!(f64, l, r) as i8 as f64,
-                        Op2::And => (is_truthy(l) && is_truthy(r)) as i8 as f64,
-                        Op2::Or => (is_truthy(l) || is_truthy(r)) as i8 as f64,
-                    };
-                    stack.push(result);
+                    stack.push(eval_op2(*op, l, r));
                 }
                 Opcode::Not {} => {
                     let r = stack.pop();
@@ -873,22 +916,25 @@ impl Vm {
                     for j in (0..(*n_inputs as usize)).rev() {
                         module_inputs[j] = stack.pop();
                     }
+                    let mut child_state = EvalState {
+                        stack,
+                        temp_storage,
+                        view_stack,
+                        iter_stack,
+                        broadcast_stack,
+                    };
                     match part {
                         StepPart::Initials => {
                             Self::eval_module_initials_with_overrides(
                                 sliced_sim,
-                                temp_storage,
+                                &mut child_state,
                                 context,
                                 module_off,
                                 &module_inputs,
                                 curr,
                                 next,
-                                stack,
                                 *id,
                                 overrides,
-                                view_stack,
-                                iter_stack,
-                                broadcast_stack,
                             );
                         }
                         StepPart::Flows | StepPart::Stocks => {
@@ -905,27 +951,53 @@ impl Vm {
                             };
                             Self::eval(
                                 sliced_sim,
-                                temp_storage,
+                                &mut child_state,
                                 child_module,
                                 child_module_off,
                                 &module_inputs,
                                 curr,
                                 next,
-                                stack,
-                                view_stack,
-                                iter_stack,
-                                broadcast_stack,
                             );
                         }
                     }
+                    // Recover mutable references from child_state
+                    let EvalState {
+                        stack: s,
+                        temp_storage: ts,
+                        view_stack: vs,
+                        iter_stack: is_,
+                        broadcast_stack: bs,
+                    } = child_state;
+                    stack = s;
+                    temp_storage = ts;
+                    view_stack = vs;
+                    iter_stack = is_;
+                    broadcast_stack = bs;
                 }
                 Opcode::AssignCurr { off } => {
                     curr[module_off + *off as usize] = stack.pop();
-                    assert_eq!(0, stack.stack.len());
+                    debug_assert_eq!(0, stack.len());
                 }
                 Opcode::AssignNext { off } => {
                     next[module_off + *off as usize] = stack.pop();
-                    assert_eq!(0, stack.stack.len());
+                    debug_assert_eq!(0, stack.len());
+                }
+                // === SUPERINSTRUCTIONS ===
+                Opcode::AssignConstCurr { off, literal_id } => {
+                    curr[module_off + *off as usize] = bytecode.literals[*literal_id as usize];
+                    debug_assert_eq!(0, stack.len());
+                }
+                Opcode::BinOpAssignCurr { op, off } => {
+                    let r = stack.pop();
+                    let l = stack.pop();
+                    curr[module_off + *off as usize] = eval_op2(*op, l, r);
+                    debug_assert_eq!(0, stack.len());
+                }
+                Opcode::BinOpAssignNext { op, off } => {
+                    let r = stack.pop();
+                    let l = stack.pop();
+                    next[module_off + *off as usize] = eval_op2(*op, l, r);
+                    debug_assert_eq!(0, stack.len());
                 }
                 Opcode::Apply { func } => {
                     let time = curr[TIME_OFF];
@@ -967,11 +1039,10 @@ impl Vm {
                 // =========================================================
                 Opcode::PushVarView {
                     base_off,
-                    n_dims,
-                    dim_ids,
+                    dim_list_id,
                 } => {
-                    // Build a view for a variable with given dimensions
-                    let n = *n_dims as usize;
+                    let (n_dims, dim_ids) = context.get_dim_list(*dim_list_id);
+                    let n = n_dims as usize;
                     let dims: SmallVec<[u16; 4]> = (0..n)
                         .map(|i| context.dimensions[dim_ids[i] as usize].size)
                         .collect();
@@ -986,10 +1057,10 @@ impl Vm {
 
                 Opcode::PushTempView {
                     temp_id,
-                    n_dims,
-                    dim_ids,
+                    dim_list_id,
                 } => {
-                    let n = *n_dims as usize;
+                    let (n_dims, dim_ids) = context.get_dim_list(*dim_list_id);
+                    let n = n_dims as usize;
                     let dims: SmallVec<[u16; 4]> = (0..n)
                         .map(|i| context.dimensions[dim_ids[i] as usize].size)
                         .collect();
@@ -1005,13 +1076,11 @@ impl Vm {
 
                 Opcode::PushVarViewDirect {
                     base_off,
-                    n_dims,
-                    dims,
+                    dim_list_id,
                 } => {
-                    // Build a view with explicit dimension sizes (no dim_id lookup needed)
-                    let n = *n_dims as usize;
+                    let (n_dims, dims) = context.get_dim_list(*dim_list_id);
+                    let n = n_dims as usize;
                     let dims_vec: SmallVec<[u16; 4]> = dims[..n].iter().copied().collect();
-                    // Use 0 as dim_id since we don't have dimension metadata
                     let dim_ids: SmallVec<[DimId; 4]> = (0..n).map(|_| 0 as DimId).collect();
                     let view = RuntimeView::for_var(
                         (module_off + *base_off as usize) as u32,
@@ -2190,6 +2259,40 @@ fn lookup_backward(table: &[(f64, f64)], index: f64) -> f64 {
     table[low - 1].1
 }
 
+#[cfg(test)]
+mod eval_op2_tests {
+    use super::*;
+
+    #[test]
+    fn test_eval_op2_arithmetic() {
+        assert_eq!(eval_op2(Op2::Add, 3.0, 4.0), 7.0);
+        assert_eq!(eval_op2(Op2::Sub, 10.0, 3.0), 7.0);
+        assert_eq!(eval_op2(Op2::Mul, 3.0, 4.0), 12.0);
+        assert_eq!(eval_op2(Op2::Div, 10.0, 4.0), 2.5);
+        assert_eq!(eval_op2(Op2::Exp, 2.0, 3.0), 8.0);
+        assert_eq!(eval_op2(Op2::Mod, 7.0, 3.0), 1.0);
+    }
+
+    #[test]
+    fn test_eval_op2_comparisons() {
+        assert_eq!(eval_op2(Op2::Gt, 5.0, 3.0), 1.0);
+        assert_eq!(eval_op2(Op2::Gt, 3.0, 5.0), 0.0);
+        assert_eq!(eval_op2(Op2::Gte, 5.0, 5.0), 1.0);
+        assert_eq!(eval_op2(Op2::Lt, 3.0, 5.0), 1.0);
+        assert_eq!(eval_op2(Op2::Lte, 5.0, 5.0), 1.0);
+        assert_eq!(eval_op2(Op2::Eq, 5.0, 5.0), 1.0);
+        assert_eq!(eval_op2(Op2::Eq, 5.0, 5.1), 0.0);
+    }
+
+    #[test]
+    fn test_eval_op2_logical() {
+        assert_eq!(eval_op2(Op2::And, 1.0, 1.0), 1.0);
+        assert_eq!(eval_op2(Op2::And, 1.0, 0.0), 0.0);
+        assert_eq!(eval_op2(Op2::Or, 0.0, 1.0), 1.0);
+        assert_eq!(eval_op2(Op2::Or, 0.0, 0.0), 0.0);
+    }
+}
+
 #[cfg(test)]
 mod lookup_tests {
     use super::*;
@@ -3101,3 +3204,1312 @@ mod override_tests {
         );
     }
 }
+
+#[cfg(test)]
+mod stack_tests {
+    use super::*;
+
+    #[test]
+    fn test_push_pop_basic() {
+        let mut s = Stack::new();
+        s.push(1.0);
+        s.push(2.0);
+        s.push(3.0);
+        assert_eq!(3.0, s.pop());
+        assert_eq!(2.0, s.pop());
+        assert_eq!(1.0, s.pop());
+    }
+
+    #[test]
+    fn test_lifo_ordering() {
+        let mut s = Stack::new();
+        for i in 0..10 {
+            s.push(i as f64);
+        }
+        for i in (0..10).rev() {
+            assert_eq!(i as f64, s.pop());
+        }
+    }
+
+    #[test]
+    fn test_clear_resets_stack() {
+        let mut s = Stack::new();
+        s.push(1.0);
+        s.push(2.0);
+        assert_eq!(2, s.len());
+        s.clear();
+        assert_eq!(0, s.len());
+    }
+
+    #[test]
+    fn test_len_tracks_size() {
+        let mut s = Stack::new();
+        assert_eq!(0, s.len());
+        s.push(10.0);
+        assert_eq!(1, s.len());
+        s.push(20.0);
+        assert_eq!(2, s.len());
+        s.pop();
+        assert_eq!(1, s.len());
+        s.pop();
+        assert_eq!(0, s.len());
+    }
+
+    #[test]
+    fn test_full_capacity() {
+        let mut s = Stack::new();
+        for i in 0..STACK_CAPACITY {
+            s.push(i as f64);
+        }
+        assert_eq!(STACK_CAPACITY, s.len());
+        for i in (0..STACK_CAPACITY).rev() {
+            assert_eq!(i as f64, s.pop());
+        }
+        assert_eq!(0, s.len());
+    }
+
+    #[test]
+    fn test_interleaved_push_pop() {
+        let mut s = Stack::new();
+        s.push(1.0);
+        s.push(2.0);
+        assert_eq!(2.0, s.pop());
+        s.push(3.0);
+        s.push(4.0);
+        assert_eq!(4.0, s.pop());
+        assert_eq!(3.0, s.pop());
+        assert_eq!(1.0, s.pop());
+        assert_eq!(0, s.len());
+    }
+
+    #[test]
+    fn test_push_after_clear() {
+        let mut s = Stack::new();
+        s.push(1.0);
+        s.push(2.0);
+        s.clear();
+        s.push(42.0);
+        assert_eq!(1, s.len());
+        assert_eq!(42.0, s.pop());
+    }
+
+    #[test]
+    fn test_negative_and_special_values() {
+        let mut s = Stack::new();
+        s.push(-1.0);
+        s.push(0.0);
+        s.push(f64::INFINITY);
+        s.push(f64::NEG_INFINITY);
+        s.push(f64::NAN);
+        assert!(s.pop().is_nan());
+        assert_eq!(f64::NEG_INFINITY, s.pop());
+        assert_eq!(f64::INFINITY, s.pop());
+        assert_eq!(0.0, s.pop());
+        assert_eq!(-1.0, s.pop());
+    }
+}
+
+#[cfg(test)]
+mod superinstruction_tests {
+    use super::*;
+    use crate::bytecode::Opcode;
+    use crate::test_common::TestProject;
+
+    fn build_vm(tp: &TestProject) -> Vm {
+        let sim = tp.build_sim().unwrap();
+        let compiled = sim.compile().unwrap();
+        Vm::new(compiled).unwrap()
+    }
+
+    /// Helper: collect all opcodes from the flow bytecode of the root module.
+    fn flow_opcodes(vm: &Vm) -> Vec<&Opcode> {
+        let bc = &vm.sliced_sim.flow_modules[&vm.root].bytecode;
+        bc.code.iter().collect()
+    }
+
+    /// Helper: collect all opcodes from the stock bytecode of the root module.
+    fn stock_opcodes(vm: &Vm) -> Vec<&Opcode> {
+        let bc = &vm.sliced_sim.stock_modules[&vm.root].bytecode;
+        bc.code.iter().collect()
+    }
+
+    // -----------------------------------------------------------------------
+    // AssignConstCurr: a constant aux like `birth_rate = 0.1`
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_assign_const_curr_present_in_bytecode() {
+        let tp = TestProject::new("const_model")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("rate", "0.1", None)
+            .flow("inflow", "pop * rate", None)
+            .stock("pop", "100", &["inflow"], &[], None);
+
+        let vm = build_vm(&tp);
+        let ops = flow_opcodes(&vm);
+        let has_assign_const = ops
+            .iter()
+            .any(|op| matches!(op, Opcode::AssignConstCurr { .. }));
+        assert!(
+            has_assign_const,
+            "constant aux should produce AssignConstCurr in flow bytecode"
+        );
+    }
+
+    #[test]
+    fn test_assign_const_curr_simulation_result() {
+        let tp = TestProject::new("const_sim")
+            .with_sim_time(0.0, 2.0, 1.0)
+            .aux("rate", "0.1", None)
+            .flow("inflow", "pop * rate", None)
+            .stock("pop", "100", &["inflow"], &[], None);
+
+        let vm_results = tp.run_vm().unwrap();
+        let interp_results = tp.run_interpreter().unwrap();
+
+        let vm_rate = &vm_results["rate"];
+        let interp_rate = &interp_results["rate"];
+        for (i, (v, e)) in vm_rate.iter().zip(interp_rate.iter()).enumerate() {
+            assert!(
+                (v - e).abs() < 1e-10,
+                "rate mismatch at step {i}: vm={v}, interp={e}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // BinOpAssignCurr: e.g. `births = population * birth_rate`
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_binop_assign_curr_present_in_bytecode() {
+        let tp = TestProject::new("binop_model")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("rate", "0.1", None)
+            .aux("result", "rate * 2", None)
+            .flow("inflow", "0", None)
+            .stock("s", "result", &["inflow"], &[], None);
+
+        let vm = build_vm(&tp);
+        let ops = flow_opcodes(&vm);
+        let has_binop_curr = ops
+            .iter()
+            .any(|op| matches!(op, Opcode::BinOpAssignCurr { .. }));
+        assert!(
+            has_binop_curr,
+            "binary operation with assign should produce BinOpAssignCurr"
+        );
+    }
+
+    #[test]
+    fn test_binop_assign_curr_simulation_mul() {
+        let tp = TestProject::new("binop_mul")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "3", None)
+            .aux("b", "4", None)
+            .aux("result", "a * b", None)
+            .flow("inflow", "0", None)
+            .stock("s", "result", &["inflow"], &[], None);
+
+        let vm_results = tp.run_vm().unwrap();
+        assert!(
+            (vm_results["result"][0] - 12.0).abs() < 1e-10,
+            "3 * 4 should equal 12"
+        );
+    }
+
+    // -----------------------------------------------------------------------
+    // BinOpAssignNext: stock integration `stock_next = stock + flow * dt`
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_binop_assign_next_present_in_bytecode() {
+        let tp = TestProject::new("stock_integ")
+            .with_sim_time(0.0, 2.0, 1.0)
+            .flow("inflow", "10", None)
+            .stock("s", "0", &["inflow"], &[], None);
+
+        let vm = build_vm(&tp);
+        let ops = stock_opcodes(&vm);
+        let has_binop_next = ops
+            .iter()
+            .any(|op| matches!(op, Opcode::BinOpAssignNext { .. }));
+        assert!(
+            has_binop_next,
+            "stock integration should produce BinOpAssignNext in stock bytecode"
+        );
+    }
+
+    #[test]
+    fn test_binop_assign_next_simulation_stock_integration() {
+        let tp = TestProject::new("stock_integ_sim")
+            .with_sim_time(0.0, 5.0, 1.0)
+            .flow("inflow", "10", None)
+            .stock("s", "0", &["inflow"], &[], None);
+
+        let vm_results = tp.run_vm().unwrap();
+        let interp_results = tp.run_interpreter().unwrap();
+
+        let vm_s = &vm_results["s"];
+        let interp_s = &interp_results["s"];
+
+        for (i, (v, e)) in vm_s.iter().zip(interp_s.iter()).enumerate() {
+            assert!(
+                (v - e).abs() < 1e-10,
+                "stock mismatch at step {i}: vm={v}, interp={e}"
+            );
+        }
+        // s starts at 0, inflow=10, dt=1 => s at step 1 = 10, step 2 = 20, etc.
+        assert!((vm_s[0] - 0.0).abs() < 1e-10, "stock initial should be 0");
+        assert!(
+            (vm_s[1] - 10.0).abs() < 1e-10,
+            "stock at step 1 should be 10"
+        );
+    }
+
+    // -----------------------------------------------------------------------
+    // Op2 variants through BinOpAssignCurr
+    // -----------------------------------------------------------------------
+
+    fn run_binop_model(equation: &str) -> f64 {
+        let tp = TestProject::new("binop_test")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "10", None)
+            .aux("b", "3", None)
+            .aux("result", equation, None)
+            .flow("inflow", "0", None)
+            .stock("s", "result", &["inflow"], &[], None);
+
+        let vm_results = tp.run_vm().unwrap();
+        vm_results["result"][0]
+    }
+
+    #[test]
+    fn test_op2_add() {
+        let result = run_binop_model("a + b");
+        assert!((result - 13.0).abs() < 1e-10, "10 + 3 = 13, got {result}");
+    }
+
+    #[test]
+    fn test_op2_sub() {
+        let result = run_binop_model("a - b");
+        assert!((result - 7.0).abs() < 1e-10, "10 - 3 = 7, got {result}");
+    }
+
+    #[test]
+    fn test_op2_mul() {
+        let result = run_binop_model("a * b");
+        assert!((result - 30.0).abs() < 1e-10, "10 * 3 = 30, got {result}");
+    }
+
+    #[test]
+    fn test_op2_div() {
+        let result = run_binop_model("a / b");
+        assert!((result - 10.0 / 3.0).abs() < 1e-10, "10 / 3, got {result}");
+    }
+
+    #[test]
+    fn test_op2_gt() {
+        let result = run_binop_model("IF a > b THEN 1 ELSE 0");
+        assert!(
+            (result - 1.0).abs() < 1e-10,
+            "10 > 3 should be true, got {result}"
+        );
+    }
+
+    #[test]
+    fn test_op2_lt() {
+        let result = run_binop_model("IF a < b THEN 1 ELSE 0");
+        assert!(
+            (result - 0.0).abs() < 1e-10,
+            "10 < 3 should be false, got {result}"
+        );
+    }
+
+    #[test]
+    fn test_op2_eq() {
+        // a=10, b=3, so a=b should be false
+        let tp = TestProject::new("eq_test")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "5", None)
+            .aux("b", "5", None)
+            .aux("result", "IF a = b THEN 1 ELSE 0", None)
+            .flow("inflow", "0", None)
+            .stock("s", "result", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!(
+            (result - 1.0).abs() < 1e-10,
+            "5 = 5 should be true, got {result}"
+        );
+    }
+
+    #[test]
+    fn test_op2_and() {
+        let tp = TestProject::new("and_test")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "1", None)
+            .aux("b", "1", None)
+            .aux("result", "IF (a > 0) AND (b > 0) THEN 1 ELSE 0", None)
+            .flow("inflow", "0", None)
+            .stock("s", "result", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!(
+            (result - 1.0).abs() < 1e-10,
+            "1>0 AND 1>0 should be true, got {result}"
+        );
+    }
+
+    #[test]
+    fn test_op2_or() {
+        let tp = TestProject::new("or_test")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "0", None)
+            .aux("b", "1", None)
+            .aux("result", "IF (a > 0) OR (b > 0) THEN 1 ELSE 0", None)
+            .flow("inflow", "0", None)
+            .stock("s", "result", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!(
+            (result - 1.0).abs() < 1e-10,
+            "0>0 OR 1>0 should be true, got {result}"
+        );
+    }
+
+    // -----------------------------------------------------------------------
+    // Superinstruction execution correctness across multiple timesteps
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_superinstruction_population_model_matches_interpreter() {
+        let tp = TestProject::new("pop_model")
+            .with_sim_time(0.0, 10.0, 0.5)
+            .aux("birth_rate", "0.1", None)
+            .aux("death_rate", "0.05", None)
+            .flow("births", "population * birth_rate", None)
+            .flow("deaths", "population * death_rate", None)
+            .stock("population", "1000", &["births"], &["deaths"], None);
+
+        let vm_results = tp.run_vm().unwrap();
+        let interp_results = tp.run_interpreter().unwrap();
+
+        for var in &["population", "births", "deaths", "birth_rate", "death_rate"] {
+            let vm_vals = &vm_results[*var];
+            let interp_vals = &interp_results[*var];
+            assert_eq!(
+                vm_vals.len(),
+                interp_vals.len(),
+                "step count mismatch for {var}"
+            );
+            for (i, (v, e)) in vm_vals.iter().zip(interp_vals.iter()).enumerate() {
+                assert!(
+                    (v - e).abs() < 1e-10,
+                    "{var} mismatch at step {i}: vm={v}, interp={e}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_superinstruction_with_small_dt() {
+        let tp = TestProject::new("small_dt")
+            .with_sim_time(0.0, 1.0, 0.125)
+            .aux("rate", "0.5", None)
+            .flow("growth", "s * rate", None)
+            .stock("s", "10", &["growth"], &[], None);
+
+        let vm_results = tp.run_vm().unwrap();
+        let interp_results = tp.run_interpreter().unwrap();
+
+        let vm_s = &vm_results["s"];
+        let interp_s = &interp_results["s"];
+        for (i, (v, e)) in vm_s.iter().zip(interp_s.iter()).enumerate() {
+            assert!(
+                (v - e).abs() < 1e-10,
+                "s mismatch at step {i}: vm={v}, interp={e}"
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Op2 variants through *fused* BinOpAssignCurr superinstruction.
+    // The run_binop_model tests above use IF/THEN/ELSE which goes through
+    // SetCond+If, not the fused path. These tests use direct assignment
+    // to ensure the BinOpAssignCurr handler is exercised for each Op2.
+    // -----------------------------------------------------------------------
+
+    fn run_fused_binop(equation: &str) -> f64 {
+        // equation should be a direct binary op like "a ^ b" assigned to result
+        let tp = TestProject::new("fused_binop")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "10", None)
+            .aux("b", "3", None)
+            .aux("result", equation, None)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        vm_results["result"][0]
+    }
+
+    #[test]
+    fn test_fused_binop_exp() {
+        let result = run_fused_binop("a ^ b");
+        assert!((result - 1000.0).abs() < 1e-10, "10^3 = 1000, got {result}");
+    }
+
+    #[test]
+    fn test_fused_binop_div() {
+        let result = run_fused_binop("a / b");
+        assert!((result - 10.0 / 3.0).abs() < 1e-10, "10/3, got {result}");
+    }
+
+    #[test]
+    fn test_fused_binop_mod() {
+        let result = run_fused_binop("a MOD b");
+        assert!((result - 1.0).abs() < 1e-10, "10 mod 3 = 1, got {result}");
+    }
+
+    #[test]
+    fn test_fused_binop_gt() {
+        let result = run_fused_binop("a > b");
+        assert!((result - 1.0).abs() < 1e-10, "10 > 3 = 1, got {result}");
+    }
+
+    #[test]
+    fn test_fused_binop_gte() {
+        let result = run_fused_binop("a >= b");
+        assert!((result - 1.0).abs() < 1e-10, "10 >= 3 = 1, got {result}");
+    }
+
+    #[test]
+    fn test_fused_binop_lt() {
+        let result = run_fused_binop("a < b");
+        assert!((result - 0.0).abs() < 1e-10, "10 < 3 = 0, got {result}");
+    }
+
+    #[test]
+    fn test_fused_binop_lte() {
+        let result = run_fused_binop("a <= b");
+        assert!((result - 0.0).abs() < 1e-10, "10 <= 3 = 0, got {result}");
+    }
+
+    #[test]
+    fn test_fused_binop_eq() {
+        // Use equal values so we test the true case
+        let tp = TestProject::new("fused_eq")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "5", None)
+            .aux("b", "5", None)
+            .aux("result", "a = b", None)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!((result - 1.0).abs() < 1e-10, "5 = 5 = 1, got {result}");
+    }
+
+    #[test]
+    fn test_fused_binop_and() {
+        let tp = TestProject::new("fused_and")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "1", None)
+            .aux("b", "1", None)
+            .aux("result", "a AND b", None)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!((result - 1.0).abs() < 1e-10, "1 AND 1 = 1, got {result}");
+    }
+
+    #[test]
+    fn test_fused_binop_or() {
+        let tp = TestProject::new("fused_or")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "0", None)
+            .aux("b", "1", None)
+            .aux("result", "a OR b", None)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!((result - 1.0).abs() < 1e-10, "0 OR 1 = 1, got {result}");
+    }
+
+    // -----------------------------------------------------------------------
+    // Op2 variants through fused BinOpAssignNext (stock integration)
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_fused_binop_next_sub() {
+        // stock with only outflow exercises Sub in AssignNext
+        let tp = TestProject::new("fused_next_sub")
+            .with_sim_time(0.0, 3.0, 1.0)
+            .flow("outflow", "5", None)
+            .stock("s", "100", &[], &["outflow"], None);
+        let vm_results = tp.run_vm().unwrap();
+        let interp_results = tp.run_interpreter().unwrap();
+        let vm_s = &vm_results["s"];
+        let interp_s = &interp_results["s"];
+        for (i, (v, e)) in vm_s.iter().zip(interp_s.iter()).enumerate() {
+            assert!(
+                (v - e).abs() < 1e-10,
+                "s mismatch at step {i}: vm={v}, interp={e}"
+            );
+        }
+        assert!((vm_s[0] - 100.0).abs() < 1e-10, "initial should be 100");
+        assert!(
+            (vm_s[1] - 95.0).abs() < 1e-10,
+            "step 1 should be 95 (100 - 5)"
+        );
+    }
+
+    // -----------------------------------------------------------------------
+    // Unfused Op2 path: operations consumed by further stack ops
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_unfused_op2_exp_in_expression() {
+        // a^b + 1: the ^ result feeds into +, so Op2::Exp can't be fused with Assign
+        let tp = TestProject::new("unfused_exp")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "2", None)
+            .aux("b", "3", None)
+            .aux("result", "a ^ b + 1", None)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!((result - 9.0).abs() < 1e-10, "2^3 + 1 = 9, got {result}");
+    }
+
+    #[test]
+    fn test_unfused_op2_div_in_expression() {
+        let tp = TestProject::new("unfused_div")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "10", None)
+            .aux("b", "4", None)
+            .aux("result", "a / b + 1", None)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!((result - 3.5).abs() < 1e-10, "10/4 + 1 = 3.5, got {result}");
+    }
+
+    #[test]
+    fn test_unfused_op2_mod_in_expression() {
+        let tp = TestProject::new("unfused_mod")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "10", None)
+            .aux("b", "3", None)
+            .aux("result", "a MOD b + 1", None)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!(
+            (result - 2.0).abs() < 1e-10,
+            "10 mod 3 + 1 = 2, got {result}"
+        );
+    }
+
+    #[test]
+    fn test_unfused_not_operator() {
+        let tp = TestProject::new("unfused_not")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "0", None)
+            .aux("result", "NOT a", None)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["result"][0];
+        assert!((result - 1.0).abs() < 1e-10, "NOT 0 = 1, got {result}");
+    }
+
+    #[test]
+    fn test_unfused_comparison_gte_lte_in_expression() {
+        // Use >= and <= as intermediate values consumed by further ops
+        let tp = TestProject::new("unfused_cmp")
+            .with_sim_time(0.0, 1.0, 1.0)
+            .aux("a", "5", None)
+            .aux("b", "5", None)
+            .aux("gte_result", "(a >= b) + (a <= b)", None)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+        let vm_results = tp.run_vm().unwrap();
+        let result = vm_results["gte_result"][0];
+        assert!(
+            (result - 2.0).abs() < 1e-10,
+            "(5>=5) + (5<=5) = 1+1 = 2, got {result}"
+        );
+    }
+
+    #[test]
+    fn test_multiple_superinstructions_in_one_model() {
+        let tp = TestProject::new("multi_super")
+            .with_sim_time(0.0, 3.0, 1.0)
+            .aux("const_a", "2", None)
+            .aux("const_b", "3", None)
+            .aux("product", "const_a * const_b", None)
+            .aux("sum", "const_a + const_b", None)
+            .flow("inflow", "product + sum", None)
+            .stock("s", "0", &["inflow"], &[], None);
+
+        let vm = build_vm(&tp);
+        let ops = flow_opcodes(&vm);
+
+        // There should be at least 2 AssignConstCurr (for const_a, const_b)
+        let const_count = ops
+            .iter()
+            .filter(|op| matches!(op, Opcode::AssignConstCurr { .. }))
+            .count();
+        assert!(
+            const_count >= 2,
+            "expected at least 2 AssignConstCurr, got {const_count}"
+        );
+
+        let vm_results = tp.run_vm().unwrap();
+        let interp_results = tp.run_interpreter().unwrap();
+
+        // product = 2*3 = 6, sum = 2+3 = 5, inflow = 11
+        // s starts at 0, gains 11 per step
+        let vm_s = &vm_results["s"];
+        let interp_s = &interp_results["s"];
+        for (i, (v, e)) in vm_s.iter().zip(interp_s.iter()).enumerate() {
+            assert!(
+                (v - e).abs() < 1e-10,
+                "s mismatch at step {i}: vm={v}, interp={e}"
+            );
+        }
+        assert!(
+            (vm_s[1] - 11.0).abs() < 1e-10,
+            "s at step 1 should be 11, got {}",
+            vm_s[1]
+        );
+    }
+}
+
+#[cfg(test)]
+mod vm_reset_run_to_and_constants_tests {
+    use super::*;
+    use crate::canonicalize;
+    use crate::datamodel;
+    use crate::test_common::TestProject;
+
+    fn pop_model() -> TestProject {
+        TestProject::new("pop_model")
+            .with_sim_time(0.0, 100.0, 1.0)
+            .aux("birth_rate", "0.1", None)
+            .flow("births", "population * birth_rate", None)
+            .flow("deaths", "population / 80", None)
+            .stock("population", "100", &["births"], &["deaths"], None)
+    }
+
+    fn build_compiled(tp: &TestProject) -> CompiledSimulation {
+        let sim = tp.build_sim().unwrap();
+        sim.compile().unwrap()
+    }
+
+    // ================================================================
+    // Multiple reset cycles
+    // ================================================================
+
+    #[test]
+    fn test_multiple_reset_cycles_produce_identical_results() {
+        let compiled = build_compiled(&pop_model());
+        let mut vm = Vm::new(compiled).unwrap();
+
+        vm.run_to_end().unwrap();
+        let ref_series = vm.get_series(&canonicalize("population")).unwrap();
+
+        for cycle in 1..=5 {
+            vm.reset();
+            vm.run_to_end().unwrap();
+            let series = vm.get_series(&canonicalize("population")).unwrap();
+            assert_eq!(
+                series.len(),
+                ref_series.len(),
+                "cycle {cycle}: series length should match"
+            );
+            for (step, (a, b)) in ref_series.iter().zip(series.iter()).enumerate() {
+                assert!(
+                    (a - b).abs() < 1e-10,
+                    "cycle {cycle}, step {step}: {a} vs {b}"
+                );
+            }
+        }
+    }
+
+    // ================================================================
+    // Reset after partial run with different dt values
+    // ================================================================
+
+    #[test]
+    fn test_reset_after_partial_run_dt_quarter() {
+        let tp = TestProject::new("dt_quarter")
+            .with_sim_time(0.0, 10.0, 0.25)
+            .aux("rate", "0.05", None)
+            .flow("inflow", "stock * rate", None)
+            .stock("stock", "100", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+
+        let mut vm_ref = Vm::new(compiled.clone()).unwrap();
+        vm_ref.run_to_end().unwrap();
+        let ref_series = vm_ref.get_series(&canonicalize("stock")).unwrap();
+
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to(5.0).unwrap();
+        vm.reset();
+        vm.run_to_end().unwrap();
+        let series = vm.get_series(&canonicalize("stock")).unwrap();
+
+        assert_eq!(series.len(), ref_series.len());
+        for (step, (a, b)) in ref_series.iter().zip(series.iter()).enumerate() {
+            assert!(
+                (a - b).abs() < 1e-10,
+                "step {step}: reference {a} vs reset {b}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_reset_after_partial_run_dt_half() {
+        let tp = TestProject::new("dt_half")
+            .with_sim_time(0.0, 20.0, 0.5)
+            .aux("rate", "0.03", None)
+            .flow("inflow", "stock * rate", None)
+            .stock("stock", "50", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+
+        let mut vm_ref = Vm::new(compiled.clone()).unwrap();
+        vm_ref.run_to_end().unwrap();
+        let ref_series = vm_ref.get_series(&canonicalize("stock")).unwrap();
+
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to(10.0).unwrap();
+        vm.reset();
+        vm.run_to_end().unwrap();
+        let series = vm.get_series(&canonicalize("stock")).unwrap();
+
+        assert_eq!(series.len(), ref_series.len());
+        for (step, (a, b)) in ref_series.iter().zip(series.iter()).enumerate() {
+            assert!(
+                (a - b).abs() < 1e-10,
+                "step {step}: reference {a} vs reset {b}"
+            );
+        }
+    }
+
+    // ================================================================
+    // Pre-filled constants verification
+    // ================================================================
+
+    #[test]
+    fn test_prefilled_constants_after_run_initials() {
+        let tp = TestProject::new("constants_check")
+            .with_sim_time(5.0, 50.0, 0.5)
+            .flow("inflow", "0", None)
+            .stock("s", "10", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_initials().unwrap();
+
+        assert_eq!(vm.get_value_now(TIME_OFF), 5.0);
+        assert_eq!(vm.get_value_now(DT_OFF), 0.5);
+        assert_eq!(vm.get_value_now(INITIAL_TIME_OFF), 5.0);
+        assert_eq!(vm.get_value_now(FINAL_TIME_OFF), 50.0);
+
+        // DT/INITIAL_TIME/FINAL_TIME are pre-filled in every chunk slot during initials
+        let data = vm.data.as_ref().unwrap();
+        let n_slots = vm.n_slots;
+        let total_chunks = vm.n_chunks + 2;
+        for chunk in 1..total_chunks {
+            let base = chunk * n_slots;
+            assert_eq!(data[base + DT_OFF], 0.5, "DT in chunk {chunk}");
+            assert_eq!(
+                data[base + INITIAL_TIME_OFF],
+                5.0,
+                "INITIAL_TIME in chunk {chunk}"
+            );
+            assert_eq!(
+                data[base + FINAL_TIME_OFF],
+                50.0,
+                "FINAL_TIME in chunk {chunk}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_constants_remain_correct_throughout_simulation() {
+        let tp = TestProject::new("constants_during_sim")
+            .with_sim_time(0.0, 10.0, 1.0)
+            .flow("inflow", "1", None)
+            .stock("s", "0", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+
+        let data = vm.data.as_ref().unwrap();
+        let n_slots = vm.n_slots;
+        for chunk in 0..vm.n_chunks {
+            let base = chunk * n_slots;
+            assert_eq!(data[base + DT_OFF], 1.0, "DT in chunk {chunk}");
+            assert_eq!(
+                data[base + INITIAL_TIME_OFF],
+                0.0,
+                "INITIAL_TIME in chunk {chunk}"
+            );
+            assert_eq!(
+                data[base + FINAL_TIME_OFF],
+                10.0,
+                "FINAL_TIME in chunk {chunk}"
+            );
+        }
+    }
+
+    // ================================================================
+    // TIME series correctness
+    // ================================================================
+
+    #[test]
+    fn test_time_advances_by_dt_each_step() {
+        let tp = TestProject::new("time_series")
+            .with_sim_time(0.0, 5.0, 1.0)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+
+        let data = vm.data.as_ref().unwrap();
+        let n_slots = vm.n_slots;
+        for chunk in 0..vm.n_chunks {
+            let base = chunk * n_slots;
+            let expected_time = chunk as f64;
+            assert!(
+                (data[base + TIME_OFF] - expected_time).abs() < 1e-10,
+                "chunk {chunk}: TIME={}, expected {}",
+                data[base + TIME_OFF],
+                expected_time
+            );
+        }
+    }
+
+    #[test]
+    fn test_time_series_with_fractional_dt() {
+        // Use save_step=dt so every step is saved
+        let tp = TestProject::new_with_specs(
+            "time_frac",
+            datamodel::SimSpecs {
+                start: 0.0,
+                stop: 2.0,
+                dt: datamodel::Dt::Dt(0.25),
+                save_step: Some(datamodel::Dt::Dt(0.25)),
+                sim_method: datamodel::SimMethod::Euler,
+                time_units: Some("Month".to_string()),
+            },
+        )
+        .flow("inflow", "0", None)
+        .stock("s", "0", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+
+        let data = vm.data.as_ref().unwrap();
+        let n_slots = vm.n_slots;
+        // Expected: 0.0, 0.25, 0.5, ..., 2.0 => 9 saved steps
+        let expected_steps = 9;
+        assert_eq!(vm.n_chunks, expected_steps);
+        for chunk in 0..vm.n_chunks {
+            let base = chunk * n_slots;
+            let expected_time = chunk as f64 * 0.25;
+            assert!(
+                (data[base + TIME_OFF] - expected_time).abs() < 1e-10,
+                "chunk {chunk}: TIME={}, expected {}",
+                data[base + TIME_OFF],
+                expected_time
+            );
+        }
+    }
+
+    #[test]
+    fn test_time_series_with_nonzero_start() {
+        let tp = TestProject::new("time_nonzero")
+            .with_sim_time(10.0, 15.0, 1.0)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+
+        let data = vm.data.as_ref().unwrap();
+        let n_slots = vm.n_slots;
+        for chunk in 0..vm.n_chunks {
+            let base = chunk * n_slots;
+            let expected_time = 10.0 + chunk as f64;
+            assert!(
+                (data[base + TIME_OFF] - expected_time).abs() < 1e-10,
+                "chunk {chunk}: TIME={}, expected {}",
+                data[base + TIME_OFF],
+                expected_time
+            );
+        }
+    }
+
+    // ================================================================
+    // set_value_now / get_value_now
+    // ================================================================
+
+    #[test]
+    fn test_set_and_get_value_now() {
+        let tp = TestProject::new("set_get")
+            .with_sim_time(0.0, 10.0, 1.0)
+            .aux("rate", "0.1", None)
+            .flow("inflow", "stock * rate", None)
+            .stock("stock", "100", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_initials().unwrap();
+
+        let stock_off = vm.get_offset(&canonicalize("stock")).unwrap();
+
+        assert_eq!(vm.get_value_now(stock_off), 100.0);
+
+        vm.set_value_now(stock_off, 42.0);
+        assert_eq!(vm.get_value_now(stock_off), 42.0);
+
+        vm.set_value_now(stock_off, -7.5);
+        assert_eq!(vm.get_value_now(stock_off), -7.5);
+    }
+
+    #[test]
+    fn test_set_value_now_for_special_offsets() {
+        let tp = TestProject::new("set_specials")
+            .with_sim_time(0.0, 10.0, 1.0)
+            .flow("inflow", "0", None)
+            .stock("s", "0", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_initials().unwrap();
+
+        assert_eq!(vm.get_value_now(TIME_OFF), 0.0);
+        assert_eq!(vm.get_value_now(DT_OFF), 1.0);
+        assert_eq!(vm.get_value_now(INITIAL_TIME_OFF), 0.0);
+        assert_eq!(vm.get_value_now(FINAL_TIME_OFF), 10.0);
+
+        vm.set_value_now(TIME_OFF, 99.0);
+        assert_eq!(vm.get_value_now(TIME_OFF), 99.0);
+    }
+
+    #[test]
+    fn test_set_value_now_after_run_initials_affects_simulation() {
+        let tp = TestProject::new("set_after_init")
+            .with_sim_time(0.0, 5.0, 1.0)
+            .flow("inflow", "stock * 0.1", None)
+            .stock("stock", "100", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+
+        let mut vm1 = Vm::new(compiled.clone()).unwrap();
+        vm1.run_to_end().unwrap();
+        let series1 = vm1.get_series(&canonicalize("stock")).unwrap();
+
+        let mut vm2 = Vm::new(compiled).unwrap();
+        vm2.run_initials().unwrap();
+        let stock_off = vm2.get_offset(&canonicalize("stock")).unwrap();
+        vm2.set_value_now(stock_off, 200.0);
+        vm2.run_to_end().unwrap();
+        let series2 = vm2.get_series(&canonicalize("stock")).unwrap();
+
+        assert_eq!(series1[0], 100.0);
+        assert_eq!(series2[0], 200.0);
+        for step in 1..series1.len() {
+            assert!(
+                series2[step] > series1[step],
+                "step {step}: stock with init=200 ({}) should be > stock with init=100 ({})",
+                series2[step],
+                series1[step]
+            );
+        }
+    }
+
+    // ================================================================
+    // run_to with partial ranges
+    // ================================================================
+
+    #[test]
+    fn test_run_to_partial_then_continue_matches_full_run() {
+        let tp = pop_model();
+        let compiled = build_compiled(&tp);
+
+        let mut vm_full = Vm::new(compiled.clone()).unwrap();
+        vm_full.run_to_end().unwrap();
+        let full_series = vm_full.get_series(&canonicalize("population")).unwrap();
+
+        let mut vm_partial = Vm::new(compiled).unwrap();
+        vm_partial.run_to(50.0).unwrap();
+        vm_partial.run_to_end().unwrap();
+        let partial_series = vm_partial.get_series(&canonicalize("population")).unwrap();
+
+        assert_eq!(full_series.len(), partial_series.len());
+        for (step, (a, b)) in full_series.iter().zip(partial_series.iter()).enumerate() {
+            assert!(
+                (a - b).abs() < 1e-10,
+                "step {step}: full={a} vs partial+continue={b}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_run_to_multiple_segments_matches_full_run() {
+        let tp = pop_model();
+        let compiled = build_compiled(&tp);
+
+        let mut vm_full = Vm::new(compiled.clone()).unwrap();
+        vm_full.run_to_end().unwrap();
+        let full_series = vm_full.get_series(&canonicalize("population")).unwrap();
+
+        let mut vm_seg = Vm::new(compiled).unwrap();
+        vm_seg.run_to(25.0).unwrap();
+        vm_seg.run_to(50.0).unwrap();
+        vm_seg.run_to(75.0).unwrap();
+        vm_seg.run_to_end().unwrap();
+        let seg_series = vm_seg.get_series(&canonicalize("population")).unwrap();
+
+        assert_eq!(full_series.len(), seg_series.len());
+        for (step, (a, b)) in full_series.iter().zip(seg_series.iter()).enumerate() {
+            assert!(
+                (a - b).abs() < 1e-10,
+                "step {step}: full={a} vs segmented={b}"
+            );
+        }
+    }
+
+    // ================================================================
+    // Non-default save_every (save_step != dt)
+    // ================================================================
+
+    #[test]
+    fn test_save_every_2_with_dt_1() {
+        let tp = TestProject::new_with_specs(
+            "save_every_test",
+            datamodel::SimSpecs {
+                start: 0.0,
+                stop: 10.0,
+                dt: datamodel::Dt::Dt(1.0),
+                save_step: Some(datamodel::Dt::Dt(2.0)),
+                sim_method: datamodel::SimMethod::Euler,
+                time_units: Some("Month".to_string()),
+            },
+        )
+        .flow("inflow", "1", None)
+        .stock("s", "0", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+        let series = vm.get_series(&canonicalize("s")).unwrap();
+
+        // save_step=2, dt=1, start=0, stop=10: saved at t=0,2,4,6,8,10 => 6 points
+        assert_eq!(series.len(), 6, "should have 6 saved points");
+        let expected = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0];
+        for (i, (&actual, &exp)) in series.iter().zip(expected.iter()).enumerate() {
+            assert!(
+                (actual - exp).abs() < 1e-10,
+                "saved point {i}: actual={actual}, expected={exp}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_save_every_with_fractional_dt() {
+        let tp = TestProject::new_with_specs(
+            "save_frac",
+            datamodel::SimSpecs {
+                start: 0.0,
+                stop: 4.0,
+                dt: datamodel::Dt::Dt(0.5),
+                save_step: Some(datamodel::Dt::Dt(1.0)),
+                sim_method: datamodel::SimMethod::Euler,
+                time_units: Some("Month".to_string()),
+            },
+        )
+        .flow("inflow", "2", None)
+        .stock("s", "0", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+        let series = vm.get_series(&canonicalize("s")).unwrap();
+
+        // save_step=1, dt=0.5, start=0, stop=4: saved at t=0,1,2,3,4 => 5 points
+        assert_eq!(series.len(), 5, "should have 5 saved points");
+        // s increases by inflow*dt = 2*0.5 = 1.0 per dt step.
+        // At save points: t=0: 0, t=1: 2, t=2: 4, t=3: 6, t=4: 8
+        let expected = [0.0, 2.0, 4.0, 6.0, 8.0];
+        for (i, (&actual, &exp)) in series.iter().zip(expected.iter()).enumerate() {
+            assert!(
+                (actual - exp).abs() < 1e-10,
+                "saved point {i}: actual={actual}, expected={exp}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_save_every_matches_dt_gives_all_steps() {
+        let tp = TestProject::new("save_all")
+            .with_sim_time(0.0, 5.0, 1.0)
+            .flow("inflow", "1", None)
+            .stock("s", "0", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+        let series = vm.get_series(&canonicalize("s")).unwrap();
+
+        assert_eq!(series.len(), 6, "should have 6 saved points");
+        let expected = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0];
+        for (i, (&actual, &exp)) in series.iter().zip(expected.iter()).enumerate() {
+            assert!(
+                (actual - exp).abs() < 1e-10,
+                "saved point {i}: actual={actual}, expected={exp}"
+            );
+        }
+    }
+
+    // ================================================================
+    // Reset clears temp_storage
+    // ================================================================
+
+    #[test]
+    fn test_reset_zeroes_temp_storage() {
+        let tp = pop_model();
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+
+        vm.reset();
+
+        for (i, &val) in vm.temp_storage.iter().enumerate() {
+            assert_eq!(val, 0.0, "temp_storage[{i}] should be 0 after reset");
+        }
+    }
+
+    // ================================================================
+    // Simulation produces correct numerical results
+    // ================================================================
+
+    #[test]
+    fn test_exponential_growth_euler() {
+        // ds/dt = s * 0.1, s(0) = 100, dt = 1
+        let tp = TestProject::new("exp_growth")
+            .with_sim_time(0.0, 5.0, 1.0)
+            .flow("growth", "s * 0.1", None)
+            .stock("s", "100", &["growth"], &[], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+        let series = vm.get_series(&canonicalize("s")).unwrap();
+
+        // Euler: s(t+1) = s(t) * 1.1
+        let expected = [100.0, 110.0, 121.0, 133.1, 146.41, 161.051];
+        assert_eq!(series.len(), expected.len());
+        for (i, (&actual, &exp)) in series.iter().zip(expected.iter()).enumerate() {
+            assert!(
+                (actual - exp).abs() < 1e-6,
+                "step {i}: actual={actual}, expected={exp}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_decay_model_with_small_dt() {
+        // ds/dt = -s * 0.1, dt = 0.25, save_step = 0.25 so every step is saved
+        let tp = TestProject::new_with_specs(
+            "decay",
+            datamodel::SimSpecs {
+                start: 0.0,
+                stop: 1.0,
+                dt: datamodel::Dt::Dt(0.25),
+                save_step: Some(datamodel::Dt::Dt(0.25)),
+                sim_method: datamodel::SimMethod::Euler,
+                time_units: Some("Month".to_string()),
+            },
+        )
+        .flow("decay", "s * 0.1", None)
+        .stock("s", "100", &[], &["decay"], None);
+
+        let compiled = build_compiled(&tp);
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+        let series = vm.get_series(&canonicalize("s")).unwrap();
+
+        // s(t+dt) = s(t) * (1 - 0.1*0.25) = s(t) * 0.975
+        assert_eq!(series.len(), 5, "5 saved points at dt=0.25 from 0 to 1");
+        let mut expected = 100.0;
+        assert!((series[0] - expected).abs() < 1e-10);
+        for step in 1..5 {
+            expected *= 0.975;
+            assert!(
+                (series[step] - expected).abs() < 1e-10,
+                "step {step}: actual={}, expected={}",
+                series[step],
+                expected
+            );
+        }
+    }
+
+    // ================================================================
+    // Reset with save_every > 1
+    // ================================================================
+
+    #[test]
+    fn test_reset_with_save_every_produces_identical_results() {
+        let tp = TestProject::new_with_specs(
+            "save_reset",
+            datamodel::SimSpecs {
+                start: 0.0,
+                stop: 10.0,
+                dt: datamodel::Dt::Dt(0.5),
+                save_step: Some(datamodel::Dt::Dt(2.0)),
+                sim_method: datamodel::SimMethod::Euler,
+                time_units: Some("Month".to_string()),
+            },
+        )
+        .flow("inflow", "s * 0.1", None)
+        .stock("s", "100", &["inflow"], &[], None);
+
+        let compiled = build_compiled(&tp);
+
+        let mut vm_ref = Vm::new(compiled.clone()).unwrap();
+        vm_ref.run_to_end().unwrap();
+        let ref_series = vm_ref.get_series(&canonicalize("s")).unwrap();
+
+        let mut vm = Vm::new(compiled).unwrap();
+        vm.run_to_end().unwrap();
+        vm.reset();
+        vm.run_to_end().unwrap();
+        let series = vm.get_series(&canonicalize("s")).unwrap();
+
+        assert_eq!(ref_series.len(), series.len());
+        for (step, (a, b)) in ref_series.iter().zip(series.iter()).enumerate() {
+            assert!(
+                (a - b).abs() < 1e-10,
+                "step {step}: reference={a} vs reset={b}"
+            );
+        }
+    }
+}