diff --git a/scripts/toolchain.lock.sh b/scripts/toolchain.lock.sh
index a9057a2..94ac0bd 100755
--- a/scripts/toolchain.lock.sh
+++ b/scripts/toolchain.lock.sh
@@ -11,4 +11,4 @@ readonly CIRCT_REF_LOCKED="3003e9a7d0af8fe09105fa89b3584bd1e2eb7410"
 readonly CIRCT_LLVM_SUBMODULE_REF_LOCKED="aa3d6b37c7945bfb4c261dd994689de2a2de25bf"
 
 readonly SURFER_ARTIFACT_URL_LOCKED="https://gitlab.com/surfer-project/surfer/-/jobs/artifacts/main/download?job=pages_build"
-readonly SURFER_ARTIFACT_SHA256_LOCKED="2a684122436e7a7729cc4e57062fdc2ce8ec5fa096d84ca383dd59011012b873"
+readonly SURFER_ARTIFACT_SHA256_LOCKED="abf8d4c3415d445bf86edb39dda9ec9f37d20ccddf4069ec925acb608dcb661b"
diff --git a/src/lessons/sv/always-ff/description.html b/src/lessons/sv/always-ff/description.html
index fd8a251..dcf4671 100644
--- a/src/lessons/sv/always-ff/description.html
+++ b/src/lessons/sv/always-ff/description.html
@@ -1,17 +1,38 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8"/>
+<style>
+  body { font-family: 'IBM Plex Mono', monospace; font-size: 13px; max-width: 700px; margin: 2rem auto; padding: 0 1rem; background: #fffdf7; color: #2c3338; line-height: 1.6; }
+  pre  { background: #f3f0e8; border-left: 3px solid #0d6f72; padding: .75rem 1rem; overflow-x: auto; }
+  code { background: #f3f0e8; padding: 1px 4px; border-radius: 3px; }
+  blockquote { border-left: 3px solid #d5cbb2; margin: 1rem 0; padding: .5rem 1rem; background: #f9f7f0; color: #4a5560; }
+  blockquote p { margin: 0; }
+  dfn  { font-style: normal; border-bottom: 1px dashed #0d6f72; cursor: help; }
+  h2   { border-bottom: 1px solid #d5cbb2; padding-bottom: .3rem; }
+  .assignment-table { width: 100%; border-collapse: collapse; margin: 1rem 0; }
+  .assignment-table th, .assignment-table td { border: 1px solid #d5cbb2; padding: .5rem .75rem; text-align: left; }
+  .assignment-table th { background: #f3f0e8; }
+  .tag-block { color: #9a3a15; font-weight: bold; }
+  .tag-nba   { color: #0a5558; font-weight: bold; }
+</style>
+</head>
+<body>
+
 <p><code>always @(event) begin ... end</code> is a block that runs every time a specified event fires.
 If the event is a clock edge (e.g. <code>posedge clk</code>) we typically use <code>always_ff</code> instead, where "ff" stands for "flip-flop".
 </p>
 <svg width="300" height="68" viewBox="0 0 300 68" xmlns="http://www.w3.org/2000/svg" style="display:block;max-width:100%;font-family:'IBM Plex Mono',monospace;font-size:13px;margin:10px auto">
-   <rect width="300" height="68" rx="4" fill="#fffdf7"/>
-   <line x1="32" y1="0" x2="32" y2="60" stroke="#d5cbb2" stroke-width="1"/>
-   <text x="26" y="36" text-anchor="end" fill="#586469">clk</text>
+  <rect width="300" height="68" rx="4" fill="#fffdf7"/>
+  <line x1="32" y1="0" x2="32" y2="60" stroke="#d5cbb2" stroke-width="1"/>
+  <text x="26" y="36" text-anchor="end" fill="#586469">clk</text>
    <!-- 2 full clock cycles; rising edges (posedge) at x=85 and x=215 -->
-   <path d="M 36,48 L 85,48 L 85,18 L 150,18 L 150,48 L 215,48 L 215,18 L 280,18 L 280,48"
-         stroke="#8a9da2" stroke-width="2" fill="none"/>
-   <line x1="85"  y1="14" x2="85"  y2="56" stroke="#0d6f72" stroke-width="1.5" stroke-dasharray="3,3"/>
-   <text x="85"  y="65" text-anchor="middle" fill="#0d6f72" font-size="12">posedge</text>
-   <line x1="215" y1="14" x2="215" y2="56" stroke="#0d6f72" stroke-width="1.5" stroke-dasharray="3,3" opacity="0.4"/>
-   <text x="215" y="65" text-anchor="middle" fill="#0d6f72" font-size="12" opacity="0.4">posedge</text>
+  <path d="M 36,48 L 85,48 L 85,18 L 150,18 L 150,48 L 215,48 L 215,18 L 280,18 L 280,48"
+        stroke="#8a9da2" stroke-width="2" fill="none"/>
+  <line x1="85"  y1="14" x2="85"  y2="56" stroke="#0d6f72" stroke-width="1.5" stroke-dasharray="3,3"/>
+  <text x="85"  y="65" text-anchor="middle" fill="#0d6f72" font-size="12">posedge</text>
+  <line x1="215" y1="14" x2="215" y2="56" stroke="#0d6f72" stroke-width="1.5" stroke-dasharray="3,3" opacity="0.4"/>
+  <text x="215" y="65" text-anchor="middle" fill="#0d6f72" font-size="12" opacity="0.4">posedge</text>
 </svg>
 <p>
 A flip-flop is a 1-bit memory element that captures its input (d) at a clock edge and holds it until the next edge.
@@ -24,57 +45,89 @@
 end             // step 2: both mem and out update simultaneously
 </pre>
 <blockquote><p>
-   We use <strong><dfn data-card="Non-blocking assignment (<=) schedules the update to happen after all right-hand sides in the current time step are evaluated. This means two flip-flops can swap values correctly: a <= b; b <= a; works as expected. Blocking assignment (=) takes effect immediately, like a variable assignment in C — correct for combinational logic but causes races in sequential logic.">non-blocking assignment</dfn></strong> (<code>&lt;=</code>) inside <code>always_ff</code>. It works in two steps: first, all right-hand sides are sampled using current values; then all left-hand sides update simultaneously. So <code>out</code> always captures the value <code>mem</code> held <em>before</em> this edge — creating a true one-cycle delay, not a zero-delay pass-through. The same rule is why <code>a &lt;= b; b &lt;= a;</code> correctly swaps two flip-flops.</p></blockquote>
-<p>
-An SRAM is an array of flip-flops — one per bit — indexed by address.
+  We use <strong><dfn title="Non-blocking assignment (<=) schedules the update to happen after all right-hand sides in the current time step are evaluated. This means two flip-flops can swap values correctly: a <= b; b <= a; works as expected. Blocking assignment (=) takes effect immediately, like a variable assignment in C — correct for combinational logic but causes races in sequential logic.">non-blocking assignment</dfn></strong> (<code>&lt;=</code>) inside <code>always_ff</code>. It works in two steps: first, all right-hand sides are sampled using current values; then all left-hand sides update simultaneously. So <code>out</code> always captures the value <code>mem</code> held <em>before</em> this edge — creating a true one-cycle delay, not a zero-delay pass-through. The same rule is why <code>a &lt;= b; b &lt;= a;</code> correctly swaps two flip-flops.
+</p></blockquote>
+
+<h2>Blocking vs. Non-Blocking Assignments</h2>
+
+<p>The names describe how each operator behaves in the flow of your procedural code — whether the assignment <strong>blocks</strong> (pauses) execution until it completes.</p>
+
+<p><span class="tag-block">Blocking <code>=</code></span> &mdash; execution stops and waits. The assignment completes immediately, in place, before the next line runs. Think of it like hand-delivering a letter: the recipient has it before you walk away.</p>
+
+<pre>
+a = b;  // a gets b's value RIGHT NOW
+c = a;  // c sees the new value of a
+</pre>
+
+<p><span class="tag-nba">Non-blocking <code>&lt;=</code></span> &mdash; execution continues without waiting. The assignment schedules a write for later and immediately moves on. Think of it like dropping a letter in a mailbox: you keep walking and it gets delivered later, when the NBA update region runs.</p>
+
+<pre>
+a &lt;= b;  // schedules a write to a, but doesn't apply it yet
+c &lt;= a;  // c gets a's OLD value — the write above hasn't happened yet
+</pre>
+
+<p>All right-hand sides are evaluated first, then all writes happen together at the end of the time step. This is what makes <code>always_ff</code> correctly model real hardware, where all flip-flops in a clocked stage sample their inputs and update their outputs <em>simultaneously</em>.</p>
+
+<table class="assignment-table">
+  <thead>
+    <tr><th>Context</th><th>Use <code>&lt;=</code> (non-blocking)?</th><th>Use <code>=</code> (blocking)?</th></tr>
+  </thead>
+  <tbody>
+    <tr><td><code>always_ff</code> / clocked blocks</td><td>✅ Preferred</td><td>⚠️ Avoid</td></tr>
+    <tr><td>Tasks &amp; functions</td><td>⚠️ Only for static signals</td><td>✅ Correct choice</td></tr>
+    <tr><td>Automatic task output ports</td><td>❌ Forbidden</td><td>✅ Required</td></tr>
+  </tbody>
+</table>
+
+<p>An SRAM is an array of flip-flops — one per bit — indexed by address.
 We'll need a slightly more advanced pattern to model that array.
 We also use a port <code>we</code> (write enable) to control when writes happen, and a separate port <code>rdata</code> for the read result.
 </p>
 <!-- Timing diagram: write on cycle 1, read request cycle 2, result cycle 3 -->
 <svg width="400" height="210" viewBox="0 0 400 210" xmlns="http://www.w3.org/2000/svg" style="display:block;max-width:100%;font-family:'IBM Plex Mono',monospace;font-size:13px;margin:10px auto">
-   <defs>
-   <marker id="sff-rarr" markerWidth="7" markerHeight="6" refX="7" refY="3" orient="auto"><path d="M0,0 L7,3 L0,6 Z" fill="#586469"/></marker>
-   <marker id="sff-larr" markerWidth="7" markerHeight="6" refX="0" refY="3" orient="auto"><path d="M7,0 L0,3 L7,6 Z" fill="#586469"/></marker>
-   </defs>
-   <rect width="400" height="210" rx="4" fill="#fffdf7"/>
+  <defs>
+    <marker id="sff-rarr" markerWidth="7" markerHeight="6" refX="7" refY="3" orient="auto"><path d="M0,0 L7,3 L0,6 Z" fill="#586469"/></marker>
+    <marker id="sff-larr" markerWidth="7" markerHeight="6" refX="0" refY="3" orient="auto"><path d="M7,0 L0,3 L7,6 Z" fill="#586469"/></marker>
+  </defs>
+  <rect width="400" height="210" rx="4" fill="#fffdf7"/>
    <!-- column shading for write/read/result cycles; 3 zones of 108px from x=54 -->
-   <rect x="54" y="8" width="108" height="162" fill="#fef3e8" opacity="0.5" rx="2"/>
-   <rect x="162" y="8" width="108" height="162" fill="#e6f5f5" opacity="0.4" rx="2"/>
-   <rect x="270" y="8" width="108" height="162" fill="#d7f0e3" opacity="0.5" rx="2"/>
+  <rect x="54" y="8" width="108" height="162" fill="#fef3e8" opacity="0.5" rx="2"/>
+  <rect x="162" y="8" width="108" height="162" fill="#e6f5f5" opacity="0.4" rx="2"/>
+  <rect x="270" y="8" width="108" height="162" fill="#d7f0e3" opacity="0.5" rx="2"/>
    <!-- cycle labels -->
-   <text x="108" y="18" text-anchor="middle" fill="#9a3a15" font-size="12" font-weight="bold">WRITE</text>
-   <text x="216" y="18" text-anchor="middle" fill="#0a5558" font-size="12" font-weight="bold">READ REQ</text>
-   <text x="324" y="18" text-anchor="middle" fill="#1a5c32" font-size="12" font-weight="bold">READ RESULT</text>
+  <text x="108" y="18" text-anchor="middle" fill="#9a3a15" font-size="12" font-weight="bold">WRITE</text>
+  <text x="216" y="18" text-anchor="middle" fill="#0a5558" font-size="12" font-weight="bold">READ REQ</text>
+  <text x="324" y="18" text-anchor="middle" fill="#1a5c32" font-size="12" font-weight="bold">READ RESULT</text>
    <!-- separator line -->
-   <line x1="52" y1="8" x2="52" y2="172" stroke="#d5cbb2" stroke-width="1"/>
+  <line x1="52" y1="8" x2="52" y2="172" stroke="#d5cbb2" stroke-width="1"/>
    <!-- CLK: 6 half-cycles of 54px from x=54 -->
-   <text x="46" y="43" text-anchor="end" fill="#586469">clk</text>
-   <path d="M 54,44 L 108,44 L 108,30 L 162,30 L 162,44 L 216,44 L 216,30 L 270,30 L 270,44 L 324,44 L 324,30 L 378,30 L 378,44" stroke="#8a9da2" stroke-width="1.5" fill="none"/>
+  <text x="46" y="43" text-anchor="end" fill="#586469">clk</text>
+  <path d="M 54,44 L 108,44 L 108,30 L 162,30 L 162,44 L 216,44 L 216,30 L 270,30 L 270,44 L 324,44 L 324,30 L 378,30 L 378,44" stroke="#8a9da2" stroke-width="1.5" fill="none"/>
    <!-- WE: high during WRITE zone only -->
-   <text x="46" y="76" text-anchor="end" fill="#586469">we</text>
-   <path d="M 54,87 L 54,73 L 162,73 L 162,87 L 378,87" stroke="#cc5f2d" stroke-width="2" fill="none"/>
+  <text x="46" y="76" text-anchor="end" fill="#586469">we</text>
+  <path d="M 54,87 L 54,73 L 162,73 L 162,87 L 378,87" stroke="#cc5f2d" stroke-width="2" fill="none"/>
    <!-- ADDR: bus signal spanning all zones -->
-   <text x="46" y="112" text-anchor="end" fill="#586469">addr</text>
-   <rect x="55" y="100" width="322" height="18" rx="2" fill="#e6f5f5" stroke="#0d6f72" stroke-width="1"/>
-   <text x="216" y="113" text-anchor="middle" fill="#0a5558" font-size="12">addr = 2</text>
+  <text x="46" y="112" text-anchor="end" fill="#586469">addr</text>
+  <rect x="55" y="100" width="322" height="18" rx="2" fill="#e6f5f5" stroke="#0d6f72" stroke-width="1"/>
+  <text x="216" y="113" text-anchor="middle" fill="#0a5558" font-size="12">addr = 2</text>
    <!-- WDATA: valid in WRITE zone -->
-   <text x="46" y="148" text-anchor="end" fill="#586469">wdata</text>
-   <rect x="55" y="136" width="106" height="18" rx="2" fill="#fef3e8" stroke="#cc5f2d" stroke-width="1"/>
-   <text x="108" y="149" text-anchor="middle" fill="#9a3a15" font-size="12">0x42</text>
-   <line x1="162" y1="145" x2="378" y2="145" stroke="#d5cbb2" stroke-width="1.5" stroke-dasharray="4,3"/>
+  <text x="46" y="148" text-anchor="end" fill="#586469">wdata</text>
+  <rect x="55" y="136" width="106" height="18" rx="2" fill="#fef3e8" stroke="#cc5f2d" stroke-width="1"/>
+  <text x="108" y="149" text-anchor="middle" fill="#9a3a15" font-size="12">0x42</text>
+  <line x1="162" y1="145" x2="378" y2="145" stroke="#d5cbb2" stroke-width="1.5" stroke-dasharray="4,3"/>
    <!-- RDATA: valid in READ RESULT zone -->
-   <text x="46" y="184" text-anchor="end" fill="#586469">rdata</text>
-   <line x1="54" y1="181" x2="270" y2="181" stroke="#d5cbb2" stroke-width="1.5" stroke-dasharray="4,3"/>
-   <rect x="271" y="172" width="106" height="18" rx="2" fill="#d7f0e3" stroke="#1a7a3f" stroke-width="1"/>
-   <text x="324" y="185" text-anchor="middle" fill="#1a5c32" font-size="12">0x42</text>
+  <text x="46" y="184" text-anchor="end" fill="#586469">rdata</text>
+  <line x1="54" y1="181" x2="270" y2="181" stroke="#d5cbb2" stroke-width="1.5" stroke-dasharray="4,3"/>
+  <rect x="271" y="172" width="106" height="18" rx="2" fill="#d7f0e3" stroke="#1a7a3f" stroke-width="1"/>
+  <text x="324" y="185" text-anchor="middle" fill="#1a5c32" font-size="12">0x42</text>
    <!-- 1-cycle latency annotation -->
-   <line x1="108" y1="196" x2="324" y2="196" stroke="#586469" stroke-width="1.5" marker-start="url(#sff-larr)" marker-end="url(#sff-rarr)" stroke-dasharray="4,3"/>
-   <text x="216" y="208" text-anchor="middle" fill="#586469" font-size="12">1-cycle read latency</text>
+  <line x1="108" y1="196" x2="324" y2="196" stroke="#586469" stroke-width="1.5" marker-start="url(#sff-larr)" marker-end="url(#sff-rarr)" stroke-dasharray="4,3"/>
+  <text x="216" y="208" text-anchor="middle" fill="#586469" font-size="12">1-cycle read latency</text>
 </svg>
 <p>In <code>sram_core.sv</code> fill in the <code>always_ff</code> body with two statements:</p>
 <ul>
-   <li>When <code>we</code> is high, write <code>wdata</code> into <code>mem[addr]</code></li>
-   <li>Always register the read: capture <code>mem[addr]</code> into <code>rdata</code></li>
+  <li>When <code>we</code> is high, write <code>wdata</code> into <code>mem[addr]</code></li>
+  <li>Always register the read: capture <code>mem[addr]</code> into <code>rdata</code></li>
 </ul>
 <blockquote><p>The read is <em>registered</em>: drive <code>addr</code> on cycle N and <code>rdata</code> reflects that address on cycle N+1. This is the standard synchronous-read SRAM model.</p></blockquote>
 <h2>Testbench</h2>
diff --git a/src/lessons/sv/tasks-functions/description.html b/src/lessons/sv/tasks-functions/description.html
index d281958..85618e8 100644
--- a/src/lessons/sv/tasks-functions/description.html
+++ b/src/lessons/sv/tasks-functions/description.html
@@ -8,8 +8,8 @@
    // task body
 endtask</code></pre>
 <p>
-Here <code>automatic</code> means the task is re-entrant: it can be called recursively or from multiple places without interference.
-Non-automatic tasks share state across calls.
+Here <code>automatic</code> means the task is re-entrant: it can be called recursively or from multiple places without interference — just like a normal C/C++ function whose local variables live on the stack, with a fresh copy created for each call.
+A non-<code>automatic</code> (static) task behaves like a C function where every local variable is declared <code>static</code>: all calls share the same memory, so concurrent calls will overwrite each other's state.
 </p>
 <p>
 Calling a task <code>write_word(addr, data)</code> is blocking.
@@ -21,6 +21,9 @@
    <li><code>write_word(vif, addr, data)</code> — a task that drives one write transaction: assert <code>we</code>, set <code>addr</code> and <code>wdata</code>, wait one clock edge, then de-assert <code>we</code>.</li>
    <li><code>read_word(vif, addr, data)</code> — a task that drives one read transaction: set <code>addr</code>, wait one clock edge, then capture <code>rdata</code>.</li>
 </ul>
+<blockquote><p>
+  Waiting for <code>@(posedge clk)</code> is not enough on its own. The testbench and the DUT are both sensitive to the same edge, so driving or sampling signals <em>at</em> the edge puts you in a race against the simulator's scheduler. The safe pattern is to wait for the edge and then advance a small delta — <code>@(posedge clk); #1;</code> — so that your assignments land in a quiet moment after the DUT has already reacted to the clock.
+</p></blockquote>
 <blockquote><p>These are the exact helper routines a UVM driver uses internally. In Part 3 the driver wraps them in a class method that pulls transactions from a sequencer — but the core protocol logic is the same.</p></blockquote>
 <h2>Testbench structure</h2>
 <p>The <code>initial</code> block calls <code>write_word</code> and <code>read_word</code> using the shared <code>mem_if</code> virtual interface, then checks the parity of the returned data with <code>parity_check</code>.</p>
diff --git a/src/runtime/circt-adapter.js b/src/runtime/circt-adapter.js
index 9611c11..ef930d8 100644
--- a/src/runtime/circt-adapter.js
+++ b/src/runtime/circt-adapter.js
@@ -170,11 +170,21 @@ function needsUvmLibrary(files) {
 function removeInlinedPortsFromVcd(vcd) {
   if (typeof vcd !== 'string') return vcd;
   const lines = vcd.split('\n');
-  const skipIds = new Set();
+  const topLevelIds = new Set();   // ids used by non-dotted (top-level) signals
+  const dottedIds = new Map();     // id → true for dotted signal names
 
   for (const line of lines) {
     const m = line.match(/\$var\s+\S+\s+\d+\s+(\S+)\s+(\S+)(?:\s+\[\S+\])?\s+\$end/);
-    if (m && m[2].includes('.')) skipIds.add(m[1]);
+    if (!m) continue;
+    if (m[2].includes('.')) dottedIds.set(m[1], true);
+    else topLevelIds.add(m[1]);
+  }
+  // Only skip dotted signals whose VCD id is already claimed by a top-level
+  // signal (i.e. inlined port duplicates). Keep interface member signals that
+  // have their own unique id.
+  const skipIds = new Set();
+  for (const id of dottedIds.keys()) {
+    if (topLevelIds.has(id)) skipIds.add(id);
   }
   if (skipIds.size === 0) return vcd;
 
@@ -243,8 +253,9 @@ function fixLlhdVcdEncoding(vcd) {
         for (let i = 0; i < svWidth; i++) {
           decoded += flagBits[i] === '1' ? 'x' : valBits[i];
         }
-        // 1-bit results use the compact scalar form.
-        return (svWidth === 1 ? decoded : 'b' + decoded) + ' ' + id;
+        // 1-bit results use the compact scalar form (no space before id).
+        if (svWidth === 1) return decoded + id;
+        return 'b' + decoded + ' ' + id;
       }
     }
 

Context	Use `<=` (non-blocking)?	Use `=` (blocking)?
`always_ff` / clocked blocks	✅ Preferred	⚠️ Avoid
Tasks & functions	⚠️ Only for static signals	✅ Correct choice
Automatic task output ports	❌ Forbidden	✅ Required