2525BLOCKS_DIR = Path ('assets/wasm/blocks' )
2626SITE_DIR = Path ('_site' )
2727
28- NON_EXECUTABLE = {
29- 'bash' , 'sh' , 'shell' , 'powershell' , 'cmd' ,
30- 'js' , 'javascript' , 'markdown' , 'dockerfile' ,
31- 'yaml' , 'toml' , 'json' , 'plaintext' , 'text' , 'output' , 'wat' , 'rust' , '' ,
32- }
33-
3428# Pre-build the set of available hashes for fast lookup.
3529_available = {p .stem for p in BLOCKS_DIR .glob ('*.wasm' )}
3630
3731
38- def _lang_from_attrs (attrs : str ) -> str :
39- m = re .search (r'language-(\w+)' , attrs or '' )
40- return m .group (1 ).lower () if m else ''
41-
42-
4332def _text_content (html_str : str ) -> str :
4433 """Strip HTML tags and decode entities — equivalent to element.textContent."""
4534 return html_mod .unescape (re .sub (r'<[^>]+>' , '' , html_str ))
@@ -49,28 +38,25 @@ def _sha16(text: str) -> str:
4938 return hashlib .sha256 (text .encode ('utf-8' )).hexdigest ()[:16 ]
5039
5140
52- # Matches <pre ...><code class="...">...</code></pre> (single code child).
41+ # Matches <pre ...><code ...>...</code></pre>.
42+ # Language filtering is intentionally omitted: compile_blocks.py only produced
43+ # WASM for valid blocks, so hash lookup is the only gate needed.
5344_PRE_RE = re .compile (
54- r'(<pre\b)([^>]*)(>)\s*(<code\b( [^>]*) >)(.*?)</code>\s*</pre>' ,
45+ r'(<pre\b)([^>]*)(>)\s*(<code\b[^>]*>)(.*?)</code>\s*</pre>' ,
5546 re .DOTALL ,
5647)
5748
5849
5950def _inject (content : str ) -> str :
6051 def _replace (m : re .Match ) -> str :
61- pre_tag = m .group (1 ) # '<pre'
62- pre_attrs = m .group (2 ) # everything between '<pre' and '>'
63- pre_close = m .group (3 ) # '>'
64- code_open = m .group (4 ) # full '<code ...>'
65- code_attrs = m .group (5 ) # attrs inside <code>
66- code_body = m .group (6 ) # raw HTML inside <code>
67-
68- # Skip if hash already injected (idempotent).
69- if 'data-block-hash' in pre_attrs :
70- return m .group (0 )
52+ pre_tag = m .group (1 ) # '<pre'
53+ pre_attrs = m .group (2 ) # attrs between '<pre' and '>'
54+ pre_close = m .group (3 ) # '>'
55+ code_open = m .group (4 ) # full '<code ...>'
56+ code_body = m .group (5 ) # raw HTML inside <code>
7157
72- lang = _lang_from_attrs ( code_attrs )
73- if lang in NON_EXECUTABLE :
58+ # Idempotent.
59+ if 'data-block-hash' in pre_attrs :
7460 return m .group (0 )
7561
7662 text = _text_content (code_body ).strip ()
0 commit comments