diff --git a/crates/fork-instrument/src/lib.rs b/crates/fork-instrument/src/lib.rs index da9b00812..dea0ee046 100644 --- a/crates/fork-instrument/src/lib.rs +++ b/crates/fork-instrument/src/lib.rs @@ -84,6 +84,7 @@ pub fn analyze(input: &[u8], opts: &Options) -> Result { /// tool is invoked by build scripts across programs that may or may /// not use `fork()`. pub fn instrument(input: &[u8], opts: &Options) -> Result> { + let leading_dylink_section = leading_dylink_section(input); let mut module = walrus::Module::from_buffer(input) .context("failed to parse input wasm module")?; @@ -133,6 +134,57 @@ pub fn instrument(input: &[u8], opts: &Options) -> Result> { // see `instrument_one_function_switch` / `instrument_one_function_nested_switch` // for the actual transform. - let output = module.emit_wasm(); + let mut output = module.emit_wasm(); + if let Some(section) = leading_dylink_section { + // Walrus does not preserve arbitrary custom sections on re-emit. + // For side modules, dylink.0 must stay first so the dynamic linker can + // allocate memory/table requirements before instantiation. + output.splice(8..8, section.iter().copied()); + } Ok(output) } + +fn leading_dylink_section(input: &[u8]) -> Option> { + if input.len() < 8 || &input[0..4] != b"\0asm" { + return None; + } + let mut offset = 8usize; + let section_start = offset; + let section_id = *input.get(offset)?; + offset += 1; + if section_id != 0 { + return None; + } + let size = read_var_u32(input, &mut offset)? as usize; + let payload_start = offset; + let payload_end = payload_start.checked_add(size)?; + if payload_end > input.len() { + return None; + } + let name_len = read_var_u32(input, &mut offset)? as usize; + let name_end = offset.checked_add(name_len)?; + if name_end > payload_end { + return None; + } + if &input[offset..name_end] != b"dylink.0" { + return None; + } + Some(input[section_start..payload_end].to_vec()) +} + +fn read_var_u32(input: &[u8], offset: &mut usize) -> Option { + let mut result = 0u32; + let mut shift = 0u32; + loop { + let byte = *input.get(*offset)?; + *offset += 1; + result |= u32::from(byte & 0x7f) << shift; + if byte & 0x80 == 0 { + return Some(result); + } + shift += 7; + if shift >= 35 { + return None; + } + } +} diff --git a/crates/fork-instrument/src/main.rs b/crates/fork-instrument/src/main.rs index dde9b50a9..29023af5f 100644 --- a/crates/fork-instrument/src/main.rs +++ b/crates/fork-instrument/src/main.rs @@ -13,7 +13,9 @@ use anyhow::{Context, Result}; use clap::Parser; use std::fs; -use std::path::PathBuf; +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; +use std::path::{Path, PathBuf}; use fork_instrument::{Options, analyze, instrument}; @@ -71,10 +73,31 @@ fn main() -> Result<()> { fs::write(output_path, &output) .with_context(|| format!("writing output: {}", output_path.display()))?; + preserve_input_permissions(&cli.input, output_path)?; Ok(()) } +#[cfg(unix)] +fn preserve_input_permissions(input_path: &Path, output_path: &Path) -> Result<()> { + let input_mode = fs::metadata(input_path) + .with_context(|| format!("stat input for permissions: {}", input_path.display()))? + .permissions() + .mode(); + let mut output_permissions = fs::metadata(output_path) + .with_context(|| format!("stat output for permissions: {}", output_path.display()))? + .permissions(); + output_permissions.set_mode(input_mode); + fs::set_permissions(output_path, output_permissions) + .with_context(|| format!("setting output permissions: {}", output_path.display()))?; + Ok(()) +} + +#[cfg(not(unix))] +fn preserve_input_permissions(_input_path: &Path, _output_path: &Path) -> Result<()> { + Ok(()) +} + fn print_analysis_json(analysis: &fork_instrument::Analysis) { // Hand-rolled JSON to avoid a serde dependency for a tiny output. // Format is one-entry-per-line array of `{name, is_import}` objects. diff --git a/crates/fork-instrument/tests/instrument.rs b/crates/fork-instrument/tests/instrument.rs index dad3ce977..f1060d0e7 100644 --- a/crates/fork-instrument/tests/instrument.rs +++ b/crates/fork-instrument/tests/instrument.rs @@ -18,10 +18,10 @@ use std::collections::HashSet; use fork_instrument::runtime::names as runtime_names; -use fork_instrument::{Options, instrument}; +use fork_instrument::{instrument, Options}; use walrus::{ - ExportItem, FunctionId, FunctionKind, LocalFunction, Module, ir::{self, Instr, InstrSeqId}, + ExportItem, FunctionId, FunctionKind, LocalFunction, Module, }; // --- Helpers ---------------------------------------------------------- @@ -41,6 +41,49 @@ fn validate(bytes: &[u8]) { validator.validate_all(bytes).expect("valid wasm"); } +fn insert_leading_dylink_section(mut wasm: Vec) -> Vec { + // Minimal dylink.0 section with WASM_DYLINK_MEM_INFO + // {memorySize=0, memoryAlign=0, tableSize=0, tableAlign=0}. + let dylink = [ + 0x00, // custom section + 0x0f, // payload size + 0x08, b'd', b'y', b'l', b'i', b'n', b'k', b'.', b'0', 0x01, // WASM_DYLINK_MEM_INFO + 0x04, // subsection payload size + 0x00, 0x00, 0x00, 0x00, + ]; + wasm.splice(8..8, dylink); + wasm +} + +fn first_custom_section_name(bytes: &[u8]) -> Option { + let mut offset = 8usize; + if bytes.get(offset).copied()? != 0 { + return None; + } + offset += 1; + let _section_size = read_var_u32(bytes, &mut offset)?; + let name_len = read_var_u32(bytes, &mut offset)? as usize; + let name = bytes.get(offset..offset + name_len)?; + Some(String::from_utf8_lossy(name).into_owned()) +} + +fn read_var_u32(bytes: &[u8], offset: &mut usize) -> Option { + let mut result = 0u32; + let mut shift = 0u32; + loop { + let byte = *bytes.get(*offset)?; + *offset += 1; + result |= u32::from(byte & 0x7f) << shift; + if byte & 0x80 == 0 { + return Some(result); + } + shift += 7; + if shift >= 35 { + return None; + } + } +} + fn func_by_name(module: &Module, name: &str) -> FunctionId { module .funcs @@ -57,6 +100,34 @@ fn local_func(module: &Module, id: FunctionId) -> &LocalFunction { } } +#[test] +fn preserves_leading_dylink_section_for_side_modules() { + let input = insert_leading_dylink_section(parse_wat( + r#" + (module + (import "env" "fork" (func $fork (result i32))) + (memory (import "env" "memory") 1) + (func $call_fork (export "call_fork") (result i32) + (call $fork))) + "#, + )); + + let output = instrument( + &input, + &Options { + entry_import: "env.fork".into(), + }, + ) + .expect("instrument side module"); + + validate(&output); + assert_eq!( + first_custom_section_name(&output).as_deref(), + Some("dylink.0"), + "dynamic-linking side modules must keep dylink.0 as the first section", + ); +} + fn entry_instr_kinds(module: &Module, id: FunctionId) -> Vec { let f = local_func(module, id); f.block(f.entry_block()) diff --git a/crates/kernel/src/syscalls.rs b/crates/kernel/src/syscalls.rs index 28a48f0bb..be2d3a9d7 100644 --- a/crates/kernel/src/syscalls.rs +++ b/crates/kernel/src/syscalls.rs @@ -5486,12 +5486,11 @@ pub fn sys_nanosleep( pub fn sys_clock_getres(_proc: &Process, clock_id: u32) -> Result { use wasm_posix_shared::clock::*; match clock_id { - CLOCK_REALTIME | CLOCK_MONOTONIC | CLOCK_PROCESS_CPUTIME_ID | CLOCK_THREAD_CPUTIME_ID => { - Ok(WasmTimespec { - tv_sec: 0, - tv_nsec: 1_000_000, - }) // 1ms - } + CLOCK_REALTIME | CLOCK_MONOTONIC | CLOCK_PROCESS_CPUTIME_ID | CLOCK_THREAD_CPUTIME_ID + | CLOCK_BOOTTIME => Ok(WasmTimespec { + tv_sec: 0, + tv_nsec: 1_000_000, + }), // 1ms id if (id & 7) == 2 => { // Per-process CPU clock: clock_getcpuclockid encodes as (-pid-1)*8 + 2 Ok(WasmTimespec { @@ -5515,7 +5514,7 @@ pub fn sys_clock_nanosleep( ) -> Result<(), Errno> { use wasm_posix_shared::clock::*; // Validate clock_id - if clock_id != CLOCK_REALTIME && clock_id != CLOCK_MONOTONIC { + if clock_id != CLOCK_REALTIME && clock_id != CLOCK_MONOTONIC && clock_id != CLOCK_BOOTTIME { return Err(Errno::EINVAL); } // Validate timespec diff --git a/crates/kernel/src/wasm_api.rs b/crates/kernel/src/wasm_api.rs index 79d210743..6117aa24f 100644 --- a/crates/kernel/src/wasm_api.rs +++ b/crates/kernel/src/wasm_api.rs @@ -9468,11 +9468,51 @@ pub extern "C" fn kernel_getitimer(which: u32, curr_ptr: *mut u8) -> i32 { // POSIX timers (timer_create / timer_settime / timer_gettime / etc.) // --------------------------------------------------------------------------- -/// SIGEV_SIGNAL = 0, SIGEV_NONE = 1. +/// SIGEV_SIGNAL = 0, SIGEV_NONE = 1, SIGEV_THREAD_ID = 4. const SIGEV_SIGNAL: u32 = 0; +const SIGEV_NONE: u32 = 1; +const SIGEV_THREAD_ID: u32 = 4; /// TIMER_ABSTIME flag for timer_settime. const TIMER_ABSTIME: i32 = 1; +fn timer_clock_to_host_clock(clock_id: u32) -> Option { + use wasm_posix_shared::clock::*; + match clock_id { + CLOCK_REALTIME | CLOCK_MONOTONIC => Some(clock_id), + CLOCK_BOOTTIME => Some(CLOCK_MONOTONIC), + _ => None, + } +} + +fn timer_notify_supported(sigev_notify: u32) -> bool { + matches!(sigev_notify, SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD_ID) +} + +#[cfg(test)] +mod posix_timer_tests { + use super::*; + use wasm_posix_shared::clock::*; + + #[test] + fn boottime_timers_use_monotonic_host_clock() { + assert_eq!(timer_clock_to_host_clock(CLOCK_BOOTTIME), Some(CLOCK_MONOTONIC)); + } + + #[test] + fn timer_create_rejects_unsupported_clock_ids() { + assert_eq!(timer_clock_to_host_clock(CLOCK_THREAD_CPUTIME_ID), None); + assert_eq!(timer_clock_to_host_clock(99), None); + } + + #[test] + fn timer_create_accepts_thread_id_notification() { + assert!(timer_notify_supported(SIGEV_SIGNAL)); + assert!(timer_notify_supported(SIGEV_NONE)); + assert!(timer_notify_supported(SIGEV_THREAD_ID)); + assert!(!timer_notify_supported(2)); + } +} + /// timer_create(clock_id, sigevent_ptr, timerid_ptr) /// musl sends ksigevent = {sigev_value(i32), sigev_signo(i32), sigev_notify(i32), sigev_tid(i32)} = 16 bytes. /// Returns 0 on success, negative errno. @@ -9486,19 +9526,27 @@ pub extern "C" fn kernel_timer_create( let (_gkl, proc) = unsafe { get_process() }; + let host_clock_id = match timer_clock_to_host_clock(clock_id) { + Some(id) => id, + None => return -(Errno::EINVAL as i32), + }; + // Parse sigevent (default: SIGEV_SIGNAL with SIGALRM) - let (sigev_signo, sigev_value, sigev_notify) = if sevp_ptr.is_null() { - (14u32, 0i32, SIGEV_SIGNAL) // default: SIGALRM + let (sigev_signo, sigev_value, sigev_notify, sigev_tid) = if sevp_ptr.is_null() { + (14u32, 0i32, SIGEV_SIGNAL, 0u32) // default: SIGALRM } else { let buf = unsafe { slice::from_raw_parts(sevp_ptr, 16) }; let value = i32::from_le_bytes(buf[0..4].try_into().unwrap()); let signo = i32::from_le_bytes(buf[4..8].try_into().unwrap()) as u32; let notify = i32::from_le_bytes(buf[8..12].try_into().unwrap()) as u32; - (signo, value, notify) + let tid = i32::from_le_bytes(buf[12..16].try_into().unwrap()) as u32; + (signo, value, notify, tid) }; - // Only SIGEV_SIGNAL and SIGEV_NONE are supported - if sigev_notify != SIGEV_SIGNAL && sigev_notify != 1 { + if !timer_notify_supported(sigev_notify) { + return -(Errno::EINVAL as i32); + } + if sigev_notify == SIGEV_THREAD_ID && !proc.is_main_thread(sigev_tid) { return -(Errno::EINVAL as i32); } @@ -9522,7 +9570,7 @@ pub extern "C" fn kernel_timer_create( }; proc.posix_timers[timer_id] = Some(PosixTimerState { - clock_id, + clock_id: host_clock_id, sigev_signo, sigev_value, interval_sec: 0, diff --git a/crates/shared/src/lib.rs b/crates/shared/src/lib.rs index 2b0f12907..e1c5eee34 100644 --- a/crates/shared/src/lib.rs +++ b/crates/shared/src/lib.rs @@ -896,6 +896,7 @@ pub mod clock { pub const CLOCK_MONOTONIC: u32 = 1; pub const CLOCK_PROCESS_CPUTIME_ID: u32 = 2; pub const CLOCK_THREAD_CPUTIME_ID: u32 = 3; + pub const CLOCK_BOOTTIME: u32 = 7; } /// Timespec structure for the Wasm POSIX interface. diff --git a/docs/architecture.md b/docs/architecture.md index 75ef9aa0a..bc4682dce 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -316,7 +316,8 @@ Each process has a WebAssembly linear memory (shared, up to 1GB by default). The ``` Address Region 0x00000000 Wasm data segment (globals, static data) -0x00110000 Global base (--global-base=1114112) +0x00110000 Default global base (--global-base=1114112; raised by + the SDK when larger linker stack reservations require it) __heap_base First linker-free byte exported by the program control_base Host-owned low control slab - main page 0: fork-save/scratch diff --git a/docs/posix-status.md b/docs/posix-status.md index 26dd26d8d..e9843aa14 100644 --- a/docs/posix-status.md +++ b/docs/posix-status.md @@ -253,7 +253,7 @@ shortcuts. |----------|--------|-------| | `time()` | Full | Wrapper around clock_gettime(CLOCK_REALTIME). Returns seconds since epoch. | | `gettimeofday()` | Full | Wrapper around clock_gettime(CLOCK_REALTIME). Returns (sec, usec) pair. | -| `clock_gettime()` | Full | Host-delegated. CLOCK_REALTIME and CLOCK_MONOTONIC supported. Node.js uses Date.now() and process.hrtime.bigint(). | +| `clock_gettime()` | Full | Host-delegated. CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_PROCESS_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID, and CLOCK_BOOTTIME supported. CLOCK_BOOTTIME is monotonic-equivalent because Kandelo hosts cannot observe suspend time. Node.js uses Date.now() and process.hrtime.bigint(); browsers use Date.now() and performance.now(). | | `nanosleep()` | Partial | Host-delegated. Node.js uses Atomics.wait with timeout. Browser support requires a worker context that can block with Atomics.wait. Validates tv_sec >= 0 and tv_nsec < 1e9. | | `usleep()` | Full | Converts microseconds to sec+nsec, delegates to host_nanosleep. | | `clock_settime()` | Stub | Returns EPERM. Cannot set system clock from Wasm. | @@ -287,7 +287,7 @@ shortcuts. | `inotify_init()` / `inotify_init1()` | Stub | Returns ENOSYS. | | `inotify_add_watch()` / `inotify_rm_watch()` | Stub | Returns EBADF. | | `fanotify_init()` / `fanotify_mark()` | Stub | Returns ENOSYS. | -| `timer_create()` | Full | CLOCK_REALTIME and CLOCK_MONOTONIC. SIGEV_SIGNAL delivery with si_value. Per-process timer table (max 32). | +| `timer_create()` | Partial | CLOCK_REALTIME, CLOCK_MONOTONIC, and CLOCK_BOOTTIME. CLOCK_BOOTTIME is monotonic-equivalent. SIGEV_SIGNAL, SIGEV_NONE, and current-main-thread SIGEV_THREAD_ID are supported; SIGEV_THREAD is not supported. Per-process timer table (max 32). | | `timer_settime()` / `timer_gettime()` | Full | Absolute (TIMER_ABSTIME) and relative time. Interval timers with automatic rearming. Host setTimeout-based delivery. | | `timer_getoverrun()` | Full | Tracks overrun count when signal is still pending at next interval fire. Reset on successful signal delivery. | | `timer_delete()` | Full | Cancels timer and removes from per-process table. | diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md index d4d8e7bad..ca0d8aad0 100644 --- a/docs/sdk-guide.md +++ b/docs/sdk-guide.md @@ -162,7 +162,8 @@ wasm32posix-cc -shared -fPIC plugin.c -o plugin.so -Wl,--import-memory # Memory provided by host -Wl,--shared-memory # Enable SharedArrayBuffer -Wl,--max-memory=1073741824 # 1GB max memory --Wl,--global-base=1114112 # Data segment start +-Wl,--global-base= # Data segment start; defaults to 1114112 + # and is raised for larger -z stack-size -Wl,--allow-undefined # Host imports are resolved at load time -Wl,--export-table # Export function table (for dlopen) -Wl,--export=__stack_pointer # Required for fork/thread support diff --git a/host/src/browser-kernel-host.ts b/host/src/browser-kernel-host.ts index 083b5065f..81bdb6e82 100644 --- a/host/src/browser-kernel-host.ts +++ b/host/src/browser-kernel-host.ts @@ -85,6 +85,10 @@ export interface BrowserKernelOptions { syscallLogPtrWidth?: 4 | 8; /** Forwarded to TlsNetworkBackendOptions.dnsAliases. */ dnsAliases?: Record; + /** Forwarded to TlsNetworkBackendOptions.corsProxyUrl. Browser pages that + * are not controlled by Kandelo's service worker can use this to route + * guest outbound HTTP(S) through a same-origin proxy. */ + corsProxyUrl?: string; } /** Options for {@link BrowserKernel.boot}. */ @@ -395,6 +399,7 @@ export class BrowserKernel { enableSyscallLog: this.options.enableSyscallLog, syscallLogPtrWidth: this.options.syscallLogPtrWidth, dnsAliases: this.options.dnsAliases, + corsProxyUrl: this.options.corsProxyUrl, }, }; this.kernelWorkerHandle.postMessage(initMsg, [transferBuf]); diff --git a/host/src/browser-kernel-protocol.ts b/host/src/browser-kernel-protocol.ts index ce9521b5f..cf87fc4d0 100644 --- a/host/src/browser-kernel-protocol.ts +++ b/host/src/browser-kernel-protocol.ts @@ -59,6 +59,10 @@ export interface InitMessage { syscallLogPtrWidth?: 4 | 8; /** Forwarded to TlsNetworkBackendOptions.dnsAliases. */ dnsAliases?: Record; + /** Forwarded to TlsNetworkBackendOptions.corsProxyUrl for browser fetch + * backends that need a same-origin proxy to reach external HTTP(S) + * hosts. */ + corsProxyUrl?: string; }; } diff --git a/host/src/browser-kernel-worker-entry.ts b/host/src/browser-kernel-worker-entry.ts index f2414e1b9..5121755cc 100644 --- a/host/src/browser-kernel-worker-entry.ts +++ b/host/src/browser-kernel-worker-entry.ts @@ -142,6 +142,7 @@ interface ProcessInfo { } const processes = new Map(); const processTeardowns = new Map>(); +const vmInterruptTimers = new Map>(); // Includes standalone thread-worker teardown promises that may outlive the // process map entry they came from. const workerTeardowns = new Set>(); @@ -211,6 +212,41 @@ const threadWorkers = new Map(); const threadExits = new ThreadExitCoordinator(); const reportedNonzeroProcessExits = new Set(); +function clearVmInterruptTimer(pid: number): void { + const timer = vmInterruptTimers.get(pid); + if (timer) clearTimeout(timer); + vmInterruptTimers.delete(pid); +} + +function handleVmInterruptTimer(msg: { + pid: number; + timedOutPtr: number; + vmInterruptPtr: number; + seconds: number; +}): void { + clearVmInterruptTimer(msg.pid); + if (!(msg.seconds > 0)) return; + const requestedDelayMs = Math.min(msg.seconds, 999999999) * 1000; + // The process worker can be stuck in a CPU-bound Wasm loop, so a timer in + // that worker cannot set cooperative runtime interrupt flags. Run the timer + // from this kernel worker instead; the process memory is shared, matching + // the Node host's VM-interrupt timer path. + const delayMs = Math.max(1, requestedDelayMs - 100); + const timer = setTimeout(() => { + vmInterruptTimers.delete(msg.pid); + const info = processes.get(msg.pid); + if (!info) return; + const flags = new Uint8Array(info.memory.buffer); + if (msg.timedOutPtr >= 0 && msg.timedOutPtr < flags.length) { + Atomics.store(flags, msg.timedOutPtr, 1); + } + if (msg.vmInterruptPtr >= 0 && msg.vmInterruptPtr < flags.length) { + Atomics.store(flags, msg.vmInterruptPtr, 1); + } + }, delayMs); + vmInterruptTimers.set(msg.pid, timer); +} + function delay(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } @@ -245,7 +281,6 @@ function reportNonzeroProcessExitDiagnostic( (serviceLog ? `\n${serviceLog}` : "") + `\n${syscalls}`; console.warn(diagnostic); - post({ type: "stderr", pid, data: new TextEncoder().encode(`${diagnostic}\n`) }); } function readServiceLogForProcess(argv: readonly string[] | undefined): string | null { @@ -686,6 +721,7 @@ async function handleInit(msg: Extract) { // production, keeping the browser networking path identical across modes. const tlsBackend = new TlsNetworkBackend({ dnsAliases: msg.config.dnsAliases, + corsProxyUrl: msg.config.corsProxyUrl, }); await tlsBackend.init(); io.network = tlsBackend; @@ -1013,7 +1049,7 @@ function installProcessWorkerListeners( finalize(signalExitStatus(SIGSEGV), "worker exit event", SIGSEGV); }); worker.on("message", (msg: unknown) => { - const m = msg as { type?: string; message?: string; pid?: number; status?: number }; + const m = msg as WorkerToHostMessage; if (m.type === "error") { console.error(`[kernel-worker] Process error pid=${pid}:`, m.message); // Forward to host stderr so the demo log shows the actual failure @@ -1030,6 +1066,8 @@ function installProcessWorkerListeners( finalize(classifiedTrapExitStatus(m.message) ?? -1, "worker-main error message", signum); } else if (m.type === "exit") { finalize(m.status ?? 0, "worker-main exit message"); + } else if (m.type === "vm_interrupt_timer") { + handleVmInterruptTimer(m); } }); } @@ -1122,6 +1160,13 @@ async function handleExec( if (!resolved) return -2; // ENOENT if ("errno" in resolved) return -resolved.errno; const { programBytes: bytes, argv: launchArgv } = resolved; + let programModule: WebAssembly.Module; + try { + programModule = await WebAssembly.compile(bytes); + } catch (e) { + if (e instanceof WebAssembly.CompileError) return -8; // ENOEXEC + throw e; + } // Program found — run kernel exec setup const setupResult = kernelWorker.kernelExecSetup(pid); @@ -1135,6 +1180,7 @@ async function handleExec( // crash detector and tear down the kernel's view of the still-alive // (post-exec) process. const oldInfo = processes.get(pid); + clearVmInterruptTimer(pid); if (oldInfo?.worker) { intentionallyTerminated.add(oldInfo.worker as object); await oldInfo.worker.terminate().catch(() => {}); @@ -1178,6 +1224,7 @@ async function handleExec( pid, ppid: 0, programBytes: bytes, + programModule, memory: newMemory, channelOffset: newChannelOffset, argv: launchArgv, @@ -1194,6 +1241,7 @@ async function handleExec( processes.set(pid, { memory: newMemory, programBytes: bytes, + programModule, worker: newWorker, argv: launchArgv, channelOffset: newChannelOffset, @@ -1258,6 +1306,13 @@ async function handlePosixSpawn( await waitForProcessTeardowns(); post({ type: "proc_event", kind: "spawn", pid: childPid }); + let programModule: WebAssembly.Module; + try { + programModule = await WebAssembly.compile(programBytes); + } catch (e) { + if (e instanceof WebAssembly.CompileError) return -8; // ENOEXEC + throw e; + } const ptrWidth = detectPtrWidth(programBytes); const { @@ -1285,6 +1340,7 @@ async function handlePosixSpawn( pid: childPid, ppid: 0, programBytes, + programModule, memory: newMemory, channelOffset: newChannelOffset, argv, @@ -1298,6 +1354,7 @@ async function handlePosixSpawn( processes.set(childPid, { memory: newMemory, programBytes, + programModule, worker: newWorker, argv, channelOffset: newChannelOffset, @@ -1423,6 +1480,8 @@ async function handleClone( // worker-main posted {type:"error"} — instantiation failure, top-level // throw, etc. Without this the parent's pthread_join blocks forever. failThread((m as { message?: string }).message ?? "thread error"); + } else if (m.type === "vm_interrupt_timer") { + handleVmInterruptTimer(m); } }); threadWorker.on("error", (err: Error) => { @@ -1450,6 +1509,7 @@ async function finishProcessExit( exitStatus: number, crashSignum: number = signalFromExitStatus(exitStatus) ?? SIGSEGV, ): Promise { + clearVmInterruptTimer(pid); if (processTeardowns.has(pid)) return; const info = processes.get(pid); @@ -1510,6 +1570,7 @@ async function finishProcessExit( async function handleTerminateProcess(msg: Extract) { const pid = msg.pid; + clearVmInterruptTimer(pid); // Terminate thread workers const threads = threadWorkers.get(pid); @@ -1684,6 +1745,8 @@ async function handleDestroy(msg: Extract; /** Multi-module fork support for side modules loaded into this process. */ sideModuleFork?: SideModuleForkSupport; + /** Process-wide wasm SjLj tag for env.__c_longjmp, shared with the main module. */ + longjmpTag?: WebAssembly.ExportValue; /** Callback to locate and read a library file by name (async version) */ resolveLibrary?: (name: string) => Promise; /** Callback to locate and read a library file by name (sync version) */ @@ -365,14 +367,13 @@ function instantiateSharedLibrary( }; // Tag imported by side modules compiled with clang's wasm SjLj lowering - // (`-mllvm -wasm-enable-sjlj`). The host doesn't actually catch these — the - // main process either has its own __c_longjmp tag (LLVM 22) or doesn't use - // SjLj (LLVM 21). A stub Tag lets the side module's import type-check and - // instantiate; behavior at throw time is undefined but the side module - // typically never throws this tag itself. - const longjmpTag = (typeof (WebAssembly as any).Tag === "function") + // (`-mllvm -wasm-enable-sjlj`). Wasm exception matching compares tag identity, + // so side modules loaded into a process must share the main module's tag. + // Standalone linker tests and embedders without a main-module tag still get a + // local fallback so tag-importing side modules can instantiate. + const longjmpTag = options.longjmpTag ?? ((typeof (WebAssembly as any).Tag === "function") ? new (WebAssembly as any).Tag({ parameters: ["i32"] }) - : undefined; + : undefined); const module = new WebAssembly.Module(wasmBytes as unknown as BufferSource); const moduleImports = WebAssembly.Module.imports(module); diff --git a/host/src/kernel-worker.ts b/host/src/kernel-worker.ts index 92f447155..81073e0e7 100644 --- a/host/src/kernel-worker.ts +++ b/host/src/kernel-worker.ts @@ -166,9 +166,14 @@ const SIGCHLD = 17; const SIGALRM = 14; /** Network ioctl request codes */ +const SIOCGIFNAME = 0x8910; const SIOCGIFCONF = 0x8912; const SIOCGIFHWADDR = 0x8927; const SIOCGIFADDR = 0x8915; +const SIOCGIFINDEX = 0x8933; +const VIRTUAL_IFACE_NAME = "eth0"; +const VIRTUAL_IFACE_INDEX = 1; +const ENODEV = 19; /** Ioctl syscall number */ const SYS_IOCTL = ABI_SYSCALLS.Ioctl; @@ -790,6 +795,8 @@ export class CentralizedKernelWorker { /** Date.now() deadline for finite-timeout poll/ppoll retries, or -1. */ deadline?: number; }>(); + /** Absolute finite poll/ppoll deadlines that must survive EAGAIN retry wakes. */ + private pollRetryDeadlines = new Map(); /** Pending pselect6/select retries — keyed by channelOffset for per-thread tracking */ private pendingSelectRetries = new Map 0 ? Date.now() + timeoutMs : -1; + const retryKey = channel.channelOffset; + let deadline = -1; + if (timeoutMs > 0) { + const now = Date.now(); + const existingDeadline = this.pollRetryDeadlines.get(retryKey); + deadline = existingDeadline ?? (now + timeoutMs); + if (deadline <= now) { + this.pollRetryDeadlines.delete(retryKey); + this.completeChannel(channel, syscallNr, origArgs, SYSCALL_ARGS[syscallNr], 0, 0); + return; + } + if (existingDeadline === undefined) { + this.pollRetryDeadlines.set(retryKey, deadline); + } + } else { + this.pollRetryDeadlines.delete(retryKey); + } const retryFn = () => { - this.pendingPollRetries.delete(channel.channelOffset); + this.pendingPollRetries.delete(retryKey); if (!this.processes.has(channel.pid)) return; // Check deadline for finite timeout if (deadline > 0 && Date.now() >= deadline) { + this.pollRetryDeadlines.delete(retryKey); this.completeChannel(channel, syscallNr, origArgs, SYSCALL_ARGS[syscallNr], 0, 0); return; } @@ -4911,8 +4948,8 @@ export class CentralizedKernelWorker { } // ---- Network interface ioctl host-side handlers ---- - // The kernel has a single virtual network interface ("eth0") with a random - // MAC address generated per kernel instance. + // The kernel has a single virtual network interface ("eth0") at ifindex 1 + // with a random MAC address generated per kernel instance. /** * Handle SIOCGIFCONF: enumerate network interfaces. @@ -4942,8 +4979,8 @@ export class CentralizedKernelWorker { const SIZEOF_IFREQ = 32; if (ifcLen >= SIZEOF_IFREQ && ifcBuf !== 0) { - // Write one ifreq entry for "eth0" into process memory at ifc_buf - const nameBytes = new TextEncoder().encode("eth0"); + // Write one ifreq entry for the virtual interface into process memory at ifc_buf + const nameBytes = new TextEncoder().encode(VIRTUAL_IFACE_NAME); processMem.set(nameBytes, ifcBuf); processMem.fill(0, ifcBuf + nameBytes.length, ifcBuf + 16); // pad ifr_name @@ -4966,6 +5003,30 @@ export class CentralizedKernelWorker { this.relistenChannel(channel); } + /** + * Handle SIOCGIFNAME: map an interface index to its name. + * struct ifreq at arg[2]: ifr_name[16] + union; ifr_ifindex lives at +16. + */ + private handleIoctlIfname(channel: ChannelInfo, origArgs: number[]): void { + const processView = new DataView(channel.memory.buffer); + const processMem = new Uint8Array(channel.memory.buffer); + const ifreqPtr = origArgs[2]; + const ifindex = processView.getInt32(ifreqPtr + 16, true); + + if (ifindex !== VIRTUAL_IFACE_INDEX) { + this.completeChannelRaw(channel, -ENODEV, ENODEV); + this.relistenChannel(channel); + return; + } + + const nameBytes = new TextEncoder().encode(VIRTUAL_IFACE_NAME); + processMem.set(nameBytes, ifreqPtr); + processMem.fill(0, ifreqPtr + nameBytes.length, ifreqPtr + 16); + + this.completeChannelRaw(channel, 0, 0); + this.relistenChannel(channel); + } + /** * Handle SIOCGIFHWADDR: get hardware (MAC) address for an interface. * struct ifreq at arg[2]: ifr_name[16] + ifr_hwaddr (struct sockaddr, 16 bytes) @@ -5009,6 +5070,33 @@ export class CentralizedKernelWorker { this.relistenChannel(channel); } + /** + * Handle SIOCGIFINDEX: map an interface name to its index. + * struct ifreq at arg[2]: ifr_name[16] + union; ifr_ifindex lives at +16. + */ + private handleIoctlIfindex(channel: ChannelInfo, origArgs: number[]): void { + const processView = new DataView(channel.memory.buffer); + const processMem = new Uint8Array(channel.memory.buffer); + const ifreqPtr = origArgs[2]; + const nul = processMem.indexOf(0, ifreqPtr); + const end = nul >= ifreqPtr && nul < ifreqPtr + 16 ? nul : ifreqPtr + 16; + // Browser TextDecoder rejects SharedArrayBuffer-backed views. Copy the + // guest ifr_name bytes before decoding; ioctl handlers must work for + // process memories backed by SAB. + const name = new TextDecoder().decode(new Uint8Array(processMem.subarray(ifreqPtr, end))); + + if (name !== VIRTUAL_IFACE_NAME) { + this.completeChannelRaw(channel, -ENODEV, ENODEV); + this.relistenChannel(channel); + return; + } + + processView.setInt32(ifreqPtr + 16, VIRTUAL_IFACE_INDEX, true); + + this.completeChannelRaw(channel, 0, 0); + this.relistenChannel(channel); + } + private handleWritev(channel: ChannelInfo, syscallNr: number, origArgs: number[]): void { const fd = origArgs[0]; const iovPtr = origArgs[1]; diff --git a/host/src/networking/fetch-backend.ts b/host/src/networking/fetch-backend.ts index 07bd51881..2012aed7b 100644 --- a/host/src/networking/fetch-backend.ts +++ b/host/src/networking/fetch-backend.ts @@ -46,7 +46,10 @@ export function parseNumericIpv4Hostname(hostname: string): Uint8Array | null { ]); } -export function validateSyntheticDnsHostname(hostname: string): void { +export function validateSyntheticDnsHostname( + hostname: string, + aliases?: Record, +): void { // The browser backends synthesize addresses for DNS names and let fetch() // perform the real network lookup later. Do not synthesize addresses for // names that a POSIX resolver would reject before DNS, such as empty labels @@ -60,12 +63,33 @@ export function validateSyntheticDnsHostname(hostname: string): void { throw nameNotFoundError(hostname); } } + + // Browser DNS is synthetic: there is no native getaddrinfo(3) API, so the + // HTTP/TLS backends can only safely mint addresses for names that may later + // be resolved by fetch() or by an explicit backend alias. Do not report + // success for names the resolver must know are absent. The special-use + // ".invalid" zone is reserved to never resolve, and unqualified names have + // no search domain in Kandelo's browser environment. Returning a synthetic + // address for those names makes POSIX applications block in connect/recv + // instead of getting the expected getaddrinfo failure. + if (aliases && Object.prototype.hasOwnProperty.call(aliases, absoluteName)) { + return; + } + const lowerName = absoluteName.toLowerCase(); + if (aliases && Object.prototype.hasOwnProperty.call(aliases, lowerName)) { + return; + } + if (lowerName === "localhost") return; + if (!lowerName.includes(".") || lowerName === "invalid" || lowerName.endsWith(".invalid")) { + throw nameNotFoundError(hostname); + } } const POLLIN = 0x0001; const POLLOUT = 0x0004; const POLLERR = 0x0008; const POLLHUP = 0x0010; +const MSG_PEEK = 0x0002; interface ConnectionState { hostname: string; @@ -227,7 +251,7 @@ export class FetchNetworkBackend implements NetworkIO { return data.length; } - recv(handle: number, maxLen: number, _flags: number): Uint8Array { + recv(handle: number, maxLen: number, flags: number): Uint8Array { const conn = this.connections.get(handle); if (!conn) throw new Error("ENOTCONN"); @@ -248,7 +272,9 @@ export class FetchNetworkBackend implements NetworkIO { if (len === 0) return new Uint8Array(0); const result = conn.responseBuf.slice(conn.responseOffset, conn.responseOffset + len); - conn.responseOffset += len; + if ((flags & MSG_PEEK) === 0) { + conn.responseOffset += len; + } return result; } @@ -285,7 +311,7 @@ export class FetchNetworkBackend implements NetworkIO { getaddrinfo(hostname: string): Uint8Array { const literalIp = parseNumericIpv4Hostname(hostname); if (literalIp) return literalIp; - validateSyntheticDnsHostname(hostname); + validateSyntheticDnsHostname(hostname, this.options.hostAliases); // In the browser, return a synthetic IP. // The actual connection uses the Host header, not this IP. diff --git a/host/src/networking/tcp-backend.ts b/host/src/networking/tcp-backend.ts index 637af9ea3..89b8ea437 100644 --- a/host/src/networking/tcp-backend.ts +++ b/host/src/networking/tcp-backend.ts @@ -7,6 +7,7 @@ const POLLIN = 0x0001; const POLLOUT = 0x0004; const POLLERR = 0x0008; const POLLHUP = 0x0010; +const MSG_PEEK = 0x0002; /** * Map a Node.js network error code to a POSIX errno value. @@ -122,7 +123,7 @@ export class TcpNetworkBackend implements NetworkIO { return data.length; } - recv(handle: number, maxLen: number, _flags: number): Uint8Array { + recv(handle: number, maxLen: number, flags: number): Uint8Array { const conn = this.connections.get(handle); if (!conn) throw new Error("ENOTCONN"); if (conn.error) throw conn.error; @@ -134,7 +135,9 @@ export class TcpNetworkBackend implements NetworkIO { conn.recvBuf.byteOffset, len, ); - conn.recvBuf = conn.recvBuf.subarray(len); + if ((flags & MSG_PEEK) === 0) { + conn.recvBuf = conn.recvBuf.subarray(len); + } return result; } diff --git a/host/src/networking/tls-network-backend.ts b/host/src/networking/tls-network-backend.ts index 9a8745ba0..68ec5f399 100644 --- a/host/src/networking/tls-network-backend.ts +++ b/host/src/networking/tls-network-backend.ts @@ -22,6 +22,12 @@ import { type GeneratedCertificate, } from "../../../packages/registry/openssl/src/tls/certificates"; +const POLLIN = 0x0001; +const POLLOUT = 0x0004; +const POLLERR = 0x0008; +const POLLHUP = 0x0010; +const MSG_PEEK = 0x0002; + // ------------------------------------------------------------------ types interface HttpConnectionState { @@ -136,6 +142,17 @@ function formatHttpResponse( return result; } +function headersFromRawHeaderString(rawHeaders: string): Headers { + const headers = new Headers(); + for (const line of rawHeaders.split(/\r?\n/)) { + if (!line) continue; + const colon = line.indexOf(":"); + if (colon <= 0) continue; + headers.append(line.slice(0, colon).trim(), line.slice(colon + 1).trim()); + } + return headers; +} + function corsProxyFetchUrl(corsProxyUrl: string, targetUrl: string): string { if (targetUrl.startsWith(corsProxyUrl)) { return targetUrl; @@ -210,7 +227,7 @@ export class TlsNetworkBackend implements NetworkIO { getaddrinfo(hostname: string): Uint8Array { const literalIp = parseNumericIpv4Hostname(hostname); if (literalIp) return literalIp; - validateSyntheticDnsHostname(hostname); + validateSyntheticDnsHostname(hostname, this.dnsAliases); const ip = this.syntheticIp(hostname); const ipStr = this.ipKey(ip); @@ -253,14 +270,14 @@ export class TlsNetworkBackend implements NetworkIO { return this.httpSend(conn, data); } - recv(handle: number, maxLen: number, _flags: number): Uint8Array { + recv(handle: number, maxLen: number, flags: number): Uint8Array { const conn = this.connections.get(handle); if (!conn) throw new Error("ENOTCONN"); if (conn.kind === "tls") { - return this.tlsRecv(conn, maxLen); + return this.tlsRecv(conn, maxLen, flags); } - return this.httpRecv(conn, maxLen); + return this.httpRecv(conn, maxLen, flags); } close(handle: number): void { @@ -394,14 +411,16 @@ export class TlsNetworkBackend implements NetworkIO { return data.length; } - private tlsRecv(conn: TlsConnectionState, maxLen: number): Uint8Array { + private tlsRecv(conn: TlsConnectionState, maxLen: number, flags: number): Uint8Array { if (conn.error) throw conn.error; // Check if we have encrypted data buffered from the TLS engine if (conn.clientDownstreamBuf.length > 0) { const n = Math.min(maxLen, conn.clientDownstreamBuf.length); const result = conn.clientDownstreamBuf.slice(0, n); - conn.clientDownstreamBuf = conn.clientDownstreamBuf.subarray(n); + if ((flags & MSG_PEEK) === 0) { + conn.clientDownstreamBuf = conn.clientDownstreamBuf.subarray(n); + } return result; } @@ -546,6 +565,32 @@ export class TlsNetworkBackend implements NetworkIO { const fetchBody: Uint8Array | undefined = body && body.length > 0 ? new Uint8Array(body) as Uint8Array : undefined; + const isWindowContext = typeof document !== "undefined"; + if (typeof XMLHttpRequest !== "undefined" && !isWindowContext) { + try { + const xhr = new XMLHttpRequest(); + xhr.open(method, url, false); + xhr.responseType = "arraybuffer"; + for (const [key, value] of fetchHeaders) { + xhr.setRequestHeader(key, value); + } + xhr.send(fetchBody); + conn.responseBuf = formatHttpResponse( + xhr.status, + xhr.statusText, + headersFromRawHeaderString(xhr.getAllResponseHeaders()), + xhr.response ?? new ArrayBuffer(0), + ); + conn.fetchDone = true; + conn.sendBuf = new Uint8Array(0); + return data.length; + } catch { + // Fall through to fetch when synchronous XHR is unavailable or rejected + // by the current browser context. Dedicated workers use the blocking + // path to match POSIX socket reads; other contexts retain async fetch. + } + } + const doFetch = async () => { try { const response = await fetch(url, { @@ -594,7 +639,7 @@ export class TlsNetworkBackend implements NetworkIO { return data.length; } - private httpRecv(conn: HttpConnectionState, maxLen: number): Uint8Array { + private httpRecv(conn: HttpConnectionState, maxLen: number, flags: number): Uint8Array { if (!conn.fetchDone) { throw new EagainError(); } @@ -610,10 +655,50 @@ export class TlsNetworkBackend implements NetworkIO { if (len === 0) return new Uint8Array(0); const result = conn.responseBuf.slice(conn.responseOffset, conn.responseOffset + len); - conn.responseOffset += len; + if ((flags & MSG_PEEK) === 0) { + conn.responseOffset += len; + } return result; } + poll(handle: number, events: number): number { + const conn = this.connections.get(handle); + if (!conn) throw Object.assign(new Error("ENOTCONN"), { errno: 107 }); + + let revents = 0; + if ((events & POLLOUT) !== 0 && (conn.kind === "http" || !conn.closed)) { + revents |= POLLOUT; + } + + if (conn.kind === "http") { + if (conn.fetchError) return revents | POLLERR; + if ( + (events & POLLIN) !== 0 && + conn.responseBuf && + conn.responseOffset < conn.responseBuf.length + ) { + revents |= POLLIN; + } + if ( + conn.fetchDone && + conn.responseBuf && + conn.responseOffset >= conn.responseBuf.length + ) { + revents |= POLLHUP; + } + return revents; + } + + if (conn.error) return revents | POLLERR; + if ((events & POLLIN) !== 0 && conn.clientDownstreamBuf.length > 0) { + revents |= POLLIN; + } + if (conn.closed && conn.clientDownstreamBuf.length === 0) { + revents |= POLLHUP; + } + return revents; + } + // ---- Utilities ---- private syntheticIp(hostname: string): Uint8Array { diff --git a/host/src/networking/virtual-network.ts b/host/src/networking/virtual-network.ts index 68560eeb7..31fce8c1b 100644 --- a/host/src/networking/virtual-network.ts +++ b/host/src/networking/virtual-network.ts @@ -20,6 +20,7 @@ const POLLIN = 0x0001; const POLLOUT = 0x0004; const POLLERR = 0x0008; const POLLHUP = 0x0010; +const MSG_PEEK = 0x0002; const ANY = "0.0.0.0"; @@ -77,7 +78,7 @@ class VirtualTcpPeer implements TcpConnectionPeer { return data.length; } - recv(maxLen: number, _flags: number): Uint8Array { + recv(maxLen: number, flags: number): Uint8Array { if (this.reset) { const err = new Error("ECONNRESET") as Error & { errno?: number }; err.errno = ECONNRESET; @@ -86,7 +87,9 @@ class VirtualTcpPeer implements TcpConnectionPeer { if (this.recvBuf.length > 0) { const len = Math.min(maxLen, this.recvBuf.length); const out = this.recvBuf.slice(0, len); - this.recvBuf = this.recvBuf.slice(len); + if ((flags & MSG_PEEK) === 0) { + this.recvBuf = this.recvBuf.slice(len); + } return out; } if (!this.peer || this.peer.writeClosed) { diff --git a/host/src/node-kernel-host.ts b/host/src/node-kernel-host.ts index 16c713bab..822587e49 100644 --- a/host/src/node-kernel-host.ts +++ b/host/src/node-kernel-host.ts @@ -84,7 +84,15 @@ export interface NodeKernelHostOptions { * to a VFS-only world yet. */ rootfsImage?: "default" | ArrayBuffer | Uint8Array; - extraMounts?: Array<{ mountPoint: string; hostPath: string; readonly?: boolean }>; + extraMounts?: Array<{ + mountPoint: string; + hostPath: string; + readonly?: boolean; + /** Virtual owner for existing host-backed mount entries. Defaults to root. */ + uid?: number; + /** Virtual group for existing host-backed mount entries. Defaults to root. */ + gid?: number; + }>; } export interface SpawnOptions { diff --git a/host/src/node-kernel-protocol.ts b/host/src/node-kernel-protocol.ts index d40fdfa21..aa7aeed4f 100644 --- a/host/src/node-kernel-protocol.ts +++ b/host/src/node-kernel-protocol.ts @@ -37,7 +37,13 @@ export interface InitMessage { * (custom-io / legacy path). */ rootfsImage?: ArrayBuffer; - extraMounts?: Array<{ mountPoint: string; hostPath: string; readonly?: boolean }>; + extraMounts?: Array<{ + mountPoint: string; + hostPath: string; + readonly?: boolean; + uid?: number; + gid?: number; + }>; /** Attach a real-TCP backend (TcpNetworkBackend) to the worker's PlatformIO * so wasm programs can dial external hosts via Node `net.Socket`. */ enableTcpNetwork?: boolean; diff --git a/host/src/node-kernel-worker-entry.ts b/host/src/node-kernel-worker-entry.ts index 4bd3c824f..6b461517b 100644 --- a/host/src/node-kernel-worker-entry.ts +++ b/host/src/node-kernel-worker-entry.ts @@ -105,6 +105,7 @@ interface ProcessInfo { } const processes = new Map(); const processTeardowns = new Map>(); +const vmInterruptTimers = new Map>(); const reportedExits = new Set(); const compiledProgramModules = new Map>(); const MAX_COMPILED_PROGRAM_MODULES = 16; @@ -184,6 +185,37 @@ function reportProcessExit(pid: number, status: number): void { post({ type: "exit", pid, status }); } +function clearVmInterruptTimer(pid: number): void { + const timer = vmInterruptTimers.get(pid); + if (timer) clearTimeout(timer); + vmInterruptTimers.delete(pid); +} + +function handleVmInterruptTimer(msg: { + pid: number; + timedOutPtr: number; + vmInterruptPtr: number; + seconds: number; +}): void { + clearVmInterruptTimer(msg.pid); + if (!(msg.seconds > 0)) return; + const requestedDelayMs = Math.min(msg.seconds, 999999999) * 1000; + const delayMs = Math.max(1, requestedDelayMs - 100); + const timer = setTimeout(() => { + vmInterruptTimers.delete(msg.pid); + const info = processes.get(msg.pid); + if (!info) return; + const flags = new Uint8Array(info.memory.buffer); + if (msg.timedOutPtr >= 0 && msg.timedOutPtr < flags.length) { + Atomics.store(flags, msg.timedOutPtr, 1); + } + if (msg.vmInterruptPtr >= 0 && msg.vmInterruptPtr < flags.length) { + Atomics.store(flags, msg.vmInterruptPtr, 1); + } + }, delayMs); + vmInterruptTimers.set(msg.pid, timer); +} + function signalFromExitStatus(exitStatus: number): number | null { return exitStatus >= 128 ? (exitStatus - 128) & 0x7f : null; } @@ -218,6 +250,7 @@ async function finalizeProcessWorker( exitStatus: number, crashSignum: number = signalFromExitStatus(exitStatus) ?? SIGSEGV, ): Promise { + clearVmInterruptTimer(pid); const cur = processes.get(pid); if (cur && cur.worker === worker) { // Synthesize a signal-style reap *before* `deactivateProcess` in @@ -524,7 +557,13 @@ async function resolveExecutableForLaunch( */ function buildVirtualPlatformIO( rootfsImage: ArrayBuffer, - extraMounts?: Array<{ mountPoint: string; hostPath: string; readonly?: boolean }>, + extraMounts?: Array<{ + mountPoint: string; + hostPath: string; + readonly?: boolean; + uid?: number; + gid?: number; + }>, ): VirtualPlatformIO { sessionDir = mkdtempSync(join(tmpdir(), "wasm-posix-session-")); const specMounts = resolveForNode( @@ -537,7 +576,10 @@ function buildVirtualPlatformIO( shmfs.chmod("/", 0o1777); const extras: MountConfig[] = (extraMounts ?? []).map((m) => ({ mountPoint: m.mountPoint, - backend: new HostFileSystem(m.hostPath), + backend: new HostFileSystem(m.hostPath, m.mountPoint, { + uid: m.uid, + gid: m.gid, + }), readonly: m.readonly, })); const mounts = [ @@ -724,7 +766,7 @@ function handleSpawn(msg: SpawnMessage) { // — so surface them to stderr and synthesize an exit so the host's // exitResolver fires with a non-zero status. worker.on("message", (raw: unknown) => { - const m = raw as { type: string; pid?: number; message?: string; status?: number }; + const m = raw as WorkerToHostMessage; if (m.type === "error" && m.pid === pid) { finalizeProcessWorkerError(pid, worker, m.message); } else if (m.type === "exit" && m.pid === pid) { @@ -733,6 +775,8 @@ function handleSpawn(msg: SpawnMessage) { // the kernel didn't process a SYS_exit_group first, the kernel // still has the process registered and host.spawn() would hang. void finalizeProcessWorker(pid, worker, m.status ?? 0); + } else if (m.type === "vm_interrupt_timer" && m.pid === pid) { + handleVmInterruptTimer(m); } }); @@ -818,11 +862,13 @@ async function handleFork( childWorker.on("error", (err: Error) => finalizeUnexpectedWorkerError(childPid, childWorker, "worker error", err)); childWorker.on("message", (raw: unknown) => { - const m = raw as { type: string; pid?: number; message?: string; status?: number }; + const m = raw as WorkerToHostMessage; if (m.type === "error" && m.pid === childPid) { finalizeProcessWorkerError(childPid, childWorker, m.message); } else if (m.type === "exit" && m.pid === childPid) { void finalizeProcessWorker(childPid, childWorker, m.status ?? 0); + } else if (m.type === "vm_interrupt_timer" && m.pid === childPid) { + handleVmInterruptTimer(m); } }); @@ -841,6 +887,13 @@ async function handleExec( if (!resolved) return -2; // ENOENT if ("errno" in resolved) return -resolved.errno; const { programBytes, argv: launchArgv } = resolved; + let programModule: WebAssembly.Module; + try { + programModule = await getCompiledProgramModule(programBytes); + } catch (e) { + if (e instanceof WebAssembly.CompileError) return -8; // ENOEXEC + throw e; + } const newPtrWidth = detectPtrWidth(programBytes); const setupResult = kernelWorker.kernelExecSetup(pid); @@ -849,6 +902,7 @@ async function handleExec( kernelWorker.prepareProcessForExec(pid); const oldInfo = processes.get(pid); + clearVmInterruptTimer(pid); if (oldInfo?.worker) { intentionallyTerminated.add(oldInfo.worker as object); await oldInfo.worker.terminate().catch(() => {}); @@ -881,6 +935,7 @@ async function handleExec( pid, ppid: 0, programBytes, + programModule, memory: newMemory, channelOffset: newChannelOffset, argv: launchArgv, @@ -893,6 +948,7 @@ async function handleExec( processes.set(pid, { memory: newMemory, programBytes, + programModule, worker: newWorker, channelOffset: newChannelOffset, ptrWidth: newPtrWidth, @@ -906,11 +962,13 @@ async function handleExec( // uncaught wasm traps) so the host learns the process died — same // wiring as handleSpawn. newWorker.on("message", (raw: unknown) => { - const m = raw as { type: string; pid?: number; message?: string; status?: number }; + const m = raw as WorkerToHostMessage; if (m.type === "error" && m.pid === pid) { finalizeProcessWorkerError(pid, newWorker, m.message); } else if (m.type === "exit" && m.pid === pid) { void finalizeProcessWorker(pid, newWorker, m.status ?? 0); + } else if (m.type === "vm_interrupt_timer" && m.pid === pid) { + handleVmInterruptTimer(m); } }); @@ -966,6 +1024,13 @@ async function handlePosixSpawn( envp: string[], ): Promise { post({ type: "proc_event", kind: "spawn", pid: childPid }); + let programModule: WebAssembly.Module; + try { + programModule = await getCompiledProgramModule(programBytes); + } catch (e) { + if (e instanceof WebAssembly.CompileError) return -8; // ENOEXEC + throw e; + } const ptrWidth = detectPtrWidth(programBytes); const { @@ -990,6 +1055,7 @@ async function handlePosixSpawn( pid: childPid, ppid: 0, programBytes, + programModule, memory, channelOffset, argv, @@ -1002,6 +1068,7 @@ async function handlePosixSpawn( processes.set(childPid, { memory, programBytes, + programModule, worker: newWorker, channelOffset, ptrWidth, @@ -1012,11 +1079,13 @@ async function handlePosixSpawn( newWorker.on("error", (err: Error) => finalizeUnexpectedWorkerError(childPid, newWorker, "spawn worker error", err)); newWorker.on("message", (raw: unknown) => { - const m = raw as { type: string; pid?: number; message?: string; status?: number }; + const m = raw as WorkerToHostMessage; if (m.type === "error" && m.pid === childPid) { finalizeProcessWorkerError(childPid, newWorker, m.message); } else if (m.type === "exit" && m.pid === childPid) { void finalizeProcessWorker(childPid, newWorker, m.status ?? 0); + } else if (m.type === "vm_interrupt_timer" && m.pid === childPid) { + handleVmInterruptTimer(m); } }); @@ -1126,6 +1195,8 @@ async function handleClone( void terminateThreadEntry(); } else if (m.type === "error") { failThread(m.message); + } else if (m.type === "vm_interrupt_timer") { + handleVmInterruptTimer(m); } }); threadWorker.on("error", (err: Error) => failThread(`worker error: ${err.message ?? err}`)); @@ -1186,6 +1257,7 @@ async function finishProcessExit(pid: number, exitStatus: number): Promise async function handleTerminate(msg: TerminateProcessMessage) { const pid = msg.pid; + clearVmInterruptTimer(pid); // Terminate thread workers const threads = threadWorkers.get(pid); @@ -1222,6 +1294,7 @@ async function handleTerminate(msg: TerminateProcessMessage) { async function handleDestroy(msg: { requestId: number }) { const processEntries = [...processes.entries()]; for (const [pid, info] of processEntries) { + clearVmInterruptTimer(pid); await terminateThreadWorkers(pid); await terminateTrackedWorker(info.worker); try { kernelWorker.unregisterProcess(pid); } catch {} @@ -1237,6 +1310,8 @@ async function handleDestroy(msg: { requestId: number }) { } } processes.clear(); + for (const timer of vmInterruptTimers.values()) clearTimeout(timer); + vmInterruptTimers.clear(); processTeardowns.clear(); reportedExits.clear(); threadModuleCache.clear(); diff --git a/host/src/platform/native-metadata.ts b/host/src/platform/native-metadata.ts index b90463a3a..bdc904a26 100644 --- a/host/src/platform/native-metadata.ts +++ b/host/src/platform/native-metadata.ts @@ -11,6 +11,8 @@ interface VirtualMetadata { mode?: number; uid?: number; gid?: number; + atimeMs?: number; + mtimeMs?: number; ctimeMs?: number; } @@ -24,6 +26,11 @@ interface VirtualMetadata { export class NativeMetadataOverlay { private readonly entries = new Map(); + constructor( + private readonly defaultUid = 0, + private readonly defaultGid = 0, + ) {} + toStatResult(s: Stats): StatResult { const metadata = this.entries.get(this.key(s)); return { @@ -33,12 +40,14 @@ export class NativeMetadataOverlay { ? s.mode : (s.mode & ~MODE_CHANGE_MASK) | (metadata.mode & MODE_CHANGE_MASK), nlink: s.nlink, - uid: metadata?.uid ?? 0, - gid: metadata?.gid ?? 0, + uid: metadata?.uid ?? this.defaultUid, + gid: metadata?.gid ?? this.defaultGid, size: s.size, - atimeMs: s.atimeMs, - mtimeMs: s.mtimeMs, - ctimeMs: metadata?.ctimeMs ?? s.ctimeMs, + atimeMs: metadata?.atimeMs ?? s.atimeMs, + mtimeMs: metadata?.mtimeMs ?? s.mtimeMs, + ctimeMs: metadata?.ctimeMs === undefined + ? s.ctimeMs + : Math.max(metadata.ctimeMs, s.ctimeMs), }; } @@ -55,6 +64,20 @@ export class NativeMetadataOverlay { metadata.ctimeMs = Date.now(); } + utimens(s: Stats, atimeMs: number, mtimeMs: number, ctimeMs = Date.now()): void { + const metadata = this.metadataFor(s); + metadata.atimeMs = atimeMs; + metadata.mtimeMs = mtimeMs; + metadata.ctimeMs = Math.max(metadata.ctimeMs ?? 0, ctimeMs); + } + + noteNativeContentChange(s: Stats): void { + const metadata = this.entries.get(this.key(s)); + if (metadata === undefined) return; + delete metadata.atimeMs; + delete metadata.mtimeMs; + } + forget(s: Stats): void { this.entries.delete(this.key(s)); } diff --git a/host/src/platform/node.ts b/host/src/platform/node.ts index 58a133272..a4ca99629 100644 --- a/host/src/platform/node.ts +++ b/host/src/platform/node.ts @@ -106,6 +106,7 @@ export class NodePlatformIO implements PlatformIO { ): number { const pos = offset ?? this.fdPositions.get(handle) ?? 0; const bytesWritten = fs.writeSync(handle, buffer, 0, length, pos); + if (bytesWritten > 0) this.metadata.noteNativeContentChange(fs.fstatSync(handle)); if (offset === null) { this.fdPositions.set(handle, pos + bytesWritten); } @@ -218,9 +219,24 @@ export class NodePlatformIO implements PlatformIO { } utimensat(path: string, atimeSec: number, atimeNsec: number, mtimeSec: number, mtimeNsec: number): void { - const atime = atimeSec + atimeNsec / 1e9; - const mtime = mtimeSec + mtimeNsec / 1e9; - fs.utimesSync(this.rewritePath(path), atime, mtime); + const nativePath = this.rewritePath(path); + if (atimeNsec === UTIME_OMIT && mtimeNsec === UTIME_OMIT) return; + + const stat = fs.statSync(nativePath); + const current = this.metadata.toStatResult(stat); + const nowMs = Date.now(); + const atimeMs = atimeNsec === UTIME_OMIT + ? current.atimeMs + : atimeNsec === UTIME_NOW + ? nowMs + : atimeSec * 1000 + Math.floor(atimeNsec / 1_000_000); + const mtimeMs = mtimeNsec === UTIME_OMIT + ? current.mtimeMs + : mtimeNsec === UTIME_NOW + ? nowMs + : mtimeSec * 1000 + Math.floor(mtimeNsec / 1_000_000); + fs.utimesSync(nativePath, atimeMs / 1000, mtimeMs / 1000); + this.metadata.utimens(stat, atimeMs, mtimeMs, fs.statSync(nativePath).ctimeMs); } opendir(path: string): number { @@ -258,6 +274,7 @@ export class NodePlatformIO implements PlatformIO { ftruncate(handle: number, length: number): void { fs.ftruncateSync(handle, length); + this.metadata.noteNativeContentChange(fs.fstatSync(handle)); } fsync(handle: number): void { @@ -282,8 +299,8 @@ export class NodePlatformIO implements PlatformIO { const elapsed = ns - this._startNs; return { sec: Number(elapsed / 1000000000n), nsec: Number(elapsed % 1000000000n) }; } - if (clockId === 1) { - // CLOCK_MONOTONIC + if (clockId === 1 || clockId === 7) { + // CLOCK_MONOTONIC / CLOCK_BOOTTIME return { sec: Number(ns / 1000000000n), nsec: Number(ns % 1000000000n) }; } // CLOCK_REALTIME — use hrtime + epoch offset for nanosecond resolution diff --git a/host/src/vfs/host-fs.ts b/host/src/vfs/host-fs.ts index 11d57d883..32427bbd2 100644 --- a/host/src/vfs/host-fs.ts +++ b/host/src/vfs/host-fs.ts @@ -12,6 +12,9 @@ import { NativeMetadataOverlay } from "../platform/native-metadata"; import type { FileSystemBackend, DirEntry } from "./types"; import { DEFAULT_STATFS_BLOCK_SIZE, DEFAULT_STATFS_NAMELEN } from "../statfs"; +const UTIME_NOW = 0x3fffffff; +const UTIME_OMIT = 0x3ffffffe; + /** * Translate Linux/POSIX open flags (as used by musl libc) to the * platform-native flag values that Node.js `fs.openSync` expects. @@ -83,31 +86,175 @@ export function nativeStatfs(path: string): StatfsResult { export class HostFileSystem implements FileSystemBackend { private rootPath: string; + private guestMountPoint: string; private fdPositions = new Map(); private dirHandles = new Map(); private nextDirHandle = 1; - private metadata = new NativeMetadataOverlay(); - - constructor(rootPath: string) { - this.rootPath = nodePath.resolve(rootPath); + private metadata: NativeMetadataOverlay; + private dirPathCache = new Map(); + private readonly maxDirPathCacheEntries = 4096; + + constructor( + rootPath: string, + guestMountPoint = "/", + options: { uid?: number; gid?: number } = {}, + ) { + const resolvedRoot = nodePath.resolve(rootPath); + this.rootPath = fs.existsSync(resolvedRoot) + ? fs.realpathSync(resolvedRoot) + : resolvedRoot; + this.guestMountPoint = this.normalizeGuestMountPoint(guestMountPoint); + this.metadata = new NativeMetadataOverlay(options.uid ?? 0, options.gid ?? 0); } /** - * Resolve a mount-relative path to an absolute host path, - * ensuring it stays within `rootPath`. + * Resolve a mount-relative guest path to an absolute host path, ensuring it + * stays within `rootPath`. + * + * This intentionally resolves components one at a time instead of using + * `path.resolve()`. POSIX pathname resolution must look up an intermediate + * component before a following `..` can step back out of it: + * `existing/missing/../file` fails with ENOENT because `missing` is looked + * up as a directory first. Lexical normalization would incorrectly collapse + * that to `existing/file`. + * + * Native symlink targets are stored as guest strings. When following a + * symlink whose target is absolute and still inside this mount, translate it + * back to a mount-relative path before continuing. This preserves readlink(2) + * output while allowing stat/open/chmod to follow absolute in-guest links. */ - private safePath(relative: string): string { - const resolved = nodePath.resolve( - this.rootPath, - relative.replace(/^\//, ""), - ); - if ( - resolved !== this.rootPath && - !resolved.startsWith(this.rootPath + nodePath.sep) - ) { + private safePath(relative: string, followFinal = true): string { + const hadTrailingSlash = relative.length > 1 && /\/+$/.test(relative); + const originalParts = this.pathParts(relative); + let current = this.rootPath; + let pending = [...originalParts]; + let processed: string[] = []; + let symlinkDepth = 0; + let cacheable = !originalParts.includes(".."); + + if (cacheable) { + for (let i = originalParts.length; i > 0; i--) { + const cached = this.dirPathCache.get(originalParts.slice(0, i).join("/")); + if (cached === undefined) continue; + current = cached; + pending = originalParts.slice(i); + processed = originalParts.slice(0, i); + break; + } + } + + while (pending.length > 0) { + const part = pending.shift()!; + if (part === ".") continue; + if (part === "..") { + cacheable = false; + if (current === this.rootPath) { + throw new Error("EACCES: path traversal blocked"); + } + current = nodePath.dirname(current); + processed.pop(); + continue; + } + + const candidate = nodePath.join(current, part); + const isFinal = pending.length === 0; + const shouldFollow = !isFinal || followFinal; + + let lst: fs.Stats | null = null; + try { + lst = fs.lstatSync(candidate); + } catch (err: any) { + if (isFinal && err?.code === "ENOENT") { + current = candidate; + break; + } + throw err; + } + + if (shouldFollow && lst.isSymbolicLink()) { + cacheable = false; + if (++symlinkDepth > 40) throw new Error("ELOOP: too many symbolic links"); + const target = fs.readlinkSync(candidate, "utf8"); + if (target.startsWith("/")) { + const mountRelative = this.guestAbsoluteToMountRelative(target); + if (mountRelative === null) { + throw new Error("EACCES: absolute symlink target escapes mount"); + } + current = this.rootPath; + pending = [...this.pathParts(mountRelative), ...pending]; + } else { + pending = [...this.pathParts(target), ...pending]; + } + continue; + } + + if (!isFinal && !lst.isDirectory()) { + throw new Error("ENOTDIR: not a directory"); + } + + if (!isFinal) { + current = fs.realpathSync(candidate); + this.assertWithinRoot(current); + processed.push(part); + if (cacheable) this.setCachedDirPath(processed, current); + } else { + current = candidate; + } + } + + if (hadTrailingSlash && current !== this.rootPath && !current.endsWith(nodePath.sep)) { + // Keep a final separator for native fs calls. POSIX requires a + // trailing slash to resolve the preceding component as a directory; the + // native call then returns ENOTDIR for regular files while still + // permitting operations such as mkdir("new-dir/"). + current += nodePath.sep; + } + this.assertWithinRoot(current); + return current; + } + + private setCachedDirPath(parts: string[], nativePath: string): void { + if (parts.length === 0) return; + this.dirPathCache.set(parts.join("/"), nativePath); + if (this.dirPathCache.size > this.maxDirPathCacheEntries) { + const oldest = this.dirPathCache.keys().next().value; + if (oldest !== undefined) this.dirPathCache.delete(oldest); + } + } + + private clearDirPathCache(): void { + this.dirPathCache.clear(); + } + + private normalizeGuestMountPoint(mountPoint: string): string { + if (!mountPoint.startsWith("/")) mountPoint = `/${mountPoint}`; + return mountPoint !== "/" && mountPoint.endsWith("/") + ? mountPoint.slice(0, -1) + : mountPoint; + } + + private pathParts(path: string): string[] { + return path + .replace(/^\/+/, "") + .split("/") + .filter((part) => part.length > 0 && part !== "."); + } + + private guestAbsoluteToMountRelative(path: string): string | null { + if (this.guestMountPoint === "/") return path; + if (path === this.guestMountPoint) return "/"; + if (path.startsWith(`${this.guestMountPoint}/`)) { + return path.slice(this.guestMountPoint.length) || "/"; + } + return null; + } + + private assertWithinRoot(path: string): void { + const rel = nodePath.relative(this.rootPath, path); + if (rel === "") return; + if (rel.startsWith("..") || nodePath.isAbsolute(rel)) { throw new Error("EACCES: path traversal blocked"); } - return resolved; } private toStatResult(s: fs.Stats): StatResult { @@ -124,7 +271,7 @@ export class HostFileSystem implements FileSystemBackend { // ── File handle operations ─────────────────────────────────── open(path: string, flags: number, mode: number): number { - const nativePath = this.safePath(path); + const nativePath = this.safePath(path, (flags & 0o400000) === 0); const created = (flags & 0o100) !== 0 && !fs.existsSync(nativePath); const fd = fs.openSync(nativePath, translateOpenFlags(flags), mode); if (created) this.metadata.chmod(fs.fstatSync(fd), mode); @@ -160,6 +307,7 @@ export class HostFileSystem implements FileSystemBackend { ): number { const pos = offset ?? this.fdPositions.get(handle) ?? 0; const bytesWritten = fs.writeSync(handle, buffer, 0, length, pos); + if (bytesWritten > 0) this.metadata.noteNativeContentChange(fs.fstatSync(handle)); if (offset === null) { this.fdPositions.set(handle, pos + bytesWritten); } @@ -191,6 +339,7 @@ export class HostFileSystem implements FileSystemBackend { ftruncate(handle: number, length: number): void { fs.ftruncateSync(handle, length); + this.metadata.noteNativeContentChange(fs.fstatSync(handle)); } fsync(handle: number): void { @@ -212,7 +361,7 @@ export class HostFileSystem implements FileSystemBackend { } lstat(path: string): StatResult { - return this.toStatResult(fs.lstatSync(this.safePath(path))); + return this.toStatResult(fs.lstatSync(this.safePath(path, false))); } statfs(path: string): StatfsResult { @@ -226,26 +375,29 @@ export class HostFileSystem implements FileSystemBackend { } rmdir(path: string): void { - const nativePath = this.safePath(path); + const nativePath = this.safePath(path, false); const stat = fs.lstatSync(nativePath); fs.rmdirSync(nativePath); + this.clearDirPathCache(); this.metadata.forget(stat); } unlink(path: string): void { - const nativePath = this.safePath(path); + const nativePath = this.safePath(path, false); const stat = fs.lstatSync(nativePath); fs.unlinkSync(nativePath); + if (stat.isSymbolicLink()) this.clearDirPathCache(); if (stat.nlink <= 1) this.metadata.forget(stat); } rename(oldPath: string, newPath: string): void { - const nativeNewPath = this.safePath(newPath); + const nativeNewPath = this.safePath(newPath, false); let replaced: fs.Stats | undefined; try { replaced = fs.lstatSync(nativeNewPath); } catch {} - fs.renameSync(this.safePath(oldPath), nativeNewPath); + fs.renameSync(this.safePath(oldPath, false), nativeNewPath); + this.clearDirPathCache(); if (replaced !== undefined && replaced.nlink <= 1) this.metadata.forget(replaced); } @@ -254,11 +406,11 @@ export class HostFileSystem implements FileSystemBackend { } symlink(target: string, path: string): void { - fs.symlinkSync(target, this.safePath(path)); + fs.symlinkSync(target, this.safePath(path, false)); } readlink(path: string): string { - return fs.readlinkSync(this.safePath(path), "utf8"); + return fs.readlinkSync(this.safePath(path, false), "utf8"); } chmod(path: string, mode: number): void { @@ -274,9 +426,24 @@ export class HostFileSystem implements FileSystemBackend { } utimensat(path: string, atimeSec: number, atimeNsec: number, mtimeSec: number, mtimeNsec: number): void { - const atime = atimeSec + atimeNsec / 1e9; - const mtime = mtimeSec + mtimeNsec / 1e9; - fs.utimesSync(this.safePath(path), atime, mtime); + const nativePath = this.safePath(path); + if (atimeNsec === UTIME_OMIT && mtimeNsec === UTIME_OMIT) return; + + const stat = fs.statSync(nativePath); + const current = this.metadata.toStatResult(stat); + const nowMs = Date.now(); + const atimeMs = atimeNsec === UTIME_OMIT + ? current.atimeMs + : atimeNsec === UTIME_NOW + ? nowMs + : atimeSec * 1000 + Math.floor(atimeNsec / 1_000_000); + const mtimeMs = mtimeNsec === UTIME_OMIT + ? current.mtimeMs + : mtimeNsec === UTIME_NOW + ? nowMs + : mtimeSec * 1000 + Math.floor(mtimeNsec / 1_000_000); + fs.utimesSync(nativePath, atimeMs / 1000, mtimeMs / 1000); + this.metadata.utimens(stat, atimeMs, mtimeMs, fs.statSync(nativePath).ctimeMs); } // ── Directory iteration ───────────────────────────────────── diff --git a/host/src/vfs/sharedfs-vendor.ts b/host/src/vfs/sharedfs-vendor.ts index db96aca7a..66070cd8b 100644 --- a/host/src/vfs/sharedfs-vendor.ts +++ b/host/src/vfs/sharedfs-vendor.ts @@ -47,6 +47,7 @@ export const O_RDONLY = 0x0000; export const O_WRONLY = 0x0001; export const O_RDWR = 0x0002; export const O_CREAT = 0x0040; +export const O_EXCL = 0x0080; export const O_TRUNC = 0x0200; export const O_APPEND = 0x0400; export const O_DIRECTORY = 0x010000; @@ -108,7 +109,9 @@ const INO_INDIRECT = 88; const INO_DOUBLE_INDIRECT = 92; const INO_UID = 96; // u32 const INO_GID = 100; // u32 -// 104-127 reserved for future fields (flags, xattrs, etc.) +const INO_GENERATION = 104; // uint64, incremented when an inode slot is allocated +const INO_OPEN_COUNT = 112; // u32, open fd references +// 116-127 reserved for future fields (flags, xattrs, etc.) // FD entry layout const FD_INO = 4; @@ -124,6 +127,7 @@ const READER_MASK = 0x7fffffff | 0; export interface StatResult { ino: number; + generation: number; mode: number; linkCount: number; size: number; @@ -143,6 +147,20 @@ export interface SharedFsStats { maxName: number; } +interface DirIndexEntry { + ino: number; + abs: number; + recLen: number; + nameLen: number; +} + +interface DirIndex { + generation: number; + size: number; + entries: Map; + free: Array<{ abs: number; recLen: number }>; +} + const ERROR_MESSAGES: Record = { [ENOENT]: "No such file or directory", [EIO]: "I/O error", @@ -172,6 +190,7 @@ export class SFSError extends Error { const encoder = new TextEncoder(); const decoder = new TextDecoder(); +const DOTDOT_BYTES = encoder.encode(".."); /** * Safely decode a Uint8Array that may be backed by SharedArrayBuffer. @@ -194,6 +213,18 @@ export class SharedFS { private view: DataView; private i32: Int32Array; private u8: Uint8Array; + private dirIndexes = new Map(); + private blockAllocHint = 0; + private inodeAllocHint = 2; + + /** + * Directory operations are stored in ext2-style variable-length entries. + * Linear scans are fine for normal directories, but workloads such as + * PHP's bug36365 test create tens of thousands of files in one directory. + * Build an in-memory name index once a directory reaches this size so + * creates/stat lookups stay near O(1) instead of O(n²). + */ + private static readonly DIR_INDEX_MIN_SIZE = 64 * 1024; private constructor(public readonly buffer: SharedArrayBuffer) { this.view = new DataView(buffer); @@ -261,16 +292,19 @@ export class SharedFS { const freeDataBlocks = totalBlocks - dataStart; Atomics.store(fs.i32, SB_FREE_BLOCKS >> 2, freeDataBlocks); + fs.blockAllocHint = dataStart; // Mark inodes 0 and 1 as used const ibStart = inodeBitmapStart * BLOCK_SIZE; fs.i32[ibStart >> 2] |= 0x3; Atomics.store(fs.i32, SB_FREE_INODES >> 2, totalInodes - 2); + fs.inodeAllocHint = 2; // Initialize root inode (inode 1) as empty directory const rootOff = fs.inodeOffset(ROOT_INO); fs.w32(rootOff + INO_MODE, S_IFDIR | 0o755); fs.w32(rootOff + INO_LINK_COUNT, 2); + fs.w64(rootOff + INO_GENERATION, 1); // Allocate a data block for root's directory entries const rootBlock = fs.blockAlloc(); @@ -310,6 +344,7 @@ export class SharedFS { throw new SFSError(EINVAL, "Bad version"); if (fs.r32(SB_BLOCK_SIZE) !== BLOCK_SIZE) throw new SFSError(EINVAL, "Bad block size"); + fs.resetAllocationHints(); return fs; } @@ -354,6 +389,34 @@ export class SharedFS { this.view.setBigUint64(off, BigInt(v), true); } + private resetAllocationHints(): void { + this.blockAllocHint = this.findNextFreeBlockHint(); + this.inodeAllocHint = this.findNextFreeInodeHint(); + } + + private findNextFreeBlockHint(): number { + const totalBlocks = this.r32(SB_TOTAL_BLOCKS); + const dataStart = this.r32(SB_DATA_START); + const bbStart = this.r32(SB_BLOCK_BITMAP_START) * BLOCK_SIZE; + for (let blockNo = dataStart; blockNo < totalBlocks; blockNo++) { + const idx = (bbStart >> 2) + (blockNo >> 5); + const bit = blockNo & 31; + if ((Atomics.load(this.i32, idx) & (1 << bit)) === 0) return blockNo; + } + return dataStart; + } + + private findNextFreeInodeHint(): number { + const totalInodes = this.r32(SB_TOTAL_INODES); + const ibStart = this.r32(SB_INODE_BITMAP_START) * BLOCK_SIZE; + for (let ino = 2; ino < totalInodes; ino++) { + const idx = (ibStart >> 2) + (ino >> 5); + const bit = ino & 31; + if ((Atomics.load(this.i32, idx) & (1 << bit)) === 0) return ino; + } + return 2; + } + // ── Superblock lock (for grow) ─────────────────────────────────── private sbLock(): void { @@ -376,31 +439,34 @@ export class SharedFS { private blockAlloc(): number { const totalBlocks = this.r32(SB_TOTAL_BLOCKS); const bbStart = this.r32(SB_BLOCK_BITMAP_START) * BLOCK_SIZE; - const numWords = Math.ceil(totalBlocks / 32); - - for (let w = 0; w < numWords; w++) { - const idx = (bbStart >> 2) + w; + const dataStart = this.r32(SB_DATA_START); + const start = + this.blockAllocHint >= dataStart && this.blockAllocHint < totalBlocks + ? this.blockAllocHint + : dataStart; + const allocatableBlocks = totalBlocks - dataStart; + + for (let checked = 0; checked < allocatableBlocks; checked++) { + const blockNo = + dataStart + ((start - dataStart + checked) % allocatableBlocks); + const idx = (bbStart >> 2) + (blockNo >> 5); + const bit = blockNo & 31; const word = Atomics.load(this.i32, idx); - if (word === -1) continue; // all bits set (0xFFFFFFFF as int32) - - for (let bit = 0; bit < 32; bit++) { - const blockNo = w * 32 + bit; - if (blockNo >= totalBlocks) return ENOSPC; - if (word & (1 << bit)) continue; - - const desired = word | (1 << bit); - const old = Atomics.compareExchange(this.i32, idx, word, desired); - if (old === word) { - Atomics.sub(this.i32, SB_FREE_BLOCKS >> 2, 1); - // Zero the newly allocated block - const off = blockNo * BLOCK_SIZE; - this.u8.fill(0, off, off + BLOCK_SIZE); - return blockNo; - } - // CAS failed — retry this word - w--; - break; + if (word & (1 << bit)) continue; + + const desired = word | (1 << bit); + const old = Atomics.compareExchange(this.i32, idx, word, desired); + if (old === word) { + Atomics.sub(this.i32, SB_FREE_BLOCKS >> 2, 1); + this.blockAllocHint = + blockNo + 1 < totalBlocks ? blockNo + 1 : dataStart; + // Zero the newly allocated block + const off = blockNo * BLOCK_SIZE; + this.u8.fill(0, off, off + BLOCK_SIZE); + return blockNo; } + // CAS failed — retry this candidate. + checked--; } return ENOSPC; } @@ -426,6 +492,9 @@ export class SharedFS { if (old === word) break; } Atomics.add(this.i32, SB_FREE_BLOCKS >> 2, 1); + if (blockNo >= this.r32(SB_DATA_START) && blockNo < this.blockAllocHint) { + this.blockAllocHint = blockNo; + } } // ── Growth ─────────────────────────────────────────────────────── @@ -462,6 +531,7 @@ export class SharedFS { this.w32(SB_TOTAL_BLOCKS, newTotal); Atomics.add(this.i32, SB_FREE_BLOCKS >> 2, growBy); Atomics.add(this.i32, SB_GENERATION >> 2, 1); + this.blockAllocHint = current; return 0; } finally { this.sbUnlock(); @@ -480,34 +550,40 @@ export class SharedFS { private inodeAlloc(): number { const totalInodes = this.r32(SB_TOTAL_INODES); const ibStart = this.r32(SB_INODE_BITMAP_START) * BLOCK_SIZE; - const numWords = Math.ceil(totalInodes / 32); - - for (let w = 0; w < numWords; w++) { - const idx = (ibStart >> 2) + w; + const start = + this.inodeAllocHint >= 2 && this.inodeAllocHint < totalInodes + ? this.inodeAllocHint + : 2; + const allocatableInodes = totalInodes - 2; + + for (let checked = 0; checked < allocatableInodes; checked++) { + const ino = 2 + ((start - 2 + checked) % allocatableInodes); + const idx = (ibStart >> 2) + (ino >> 5); + const bit = ino & 31; const word = Atomics.load(this.i32, idx); - if (word === -1) continue; - - for (let bit = 0; bit < 32; bit++) { - const ino = w * 32 + bit; - if (ino >= totalInodes) return ENOSPC; - if (word & (1 << bit)) continue; - - const desired = word | (1 << bit); - const old = Atomics.compareExchange(this.i32, idx, word, desired); - if (old === word) { - Atomics.sub(this.i32, SB_FREE_INODES >> 2, 1); - // Zero the inode - const off = this.inodeOffset(ino); - this.u8.fill(0, off, off + INODE_SIZE); - return ino; - } - w--; - break; + if (word & (1 << bit)) continue; + + const desired = word | (1 << bit); + const old = Atomics.compareExchange(this.i32, idx, word, desired); + if (old === word) { + Atomics.sub(this.i32, SB_FREE_INODES >> 2, 1); + this.inodeAllocHint = ino + 1 < totalInodes ? ino + 1 : 2; + // Zero the inode + const off = this.inodeOffset(ino); + this.u8.fill(0, off, off + INODE_SIZE); + this.w64(off + INO_GENERATION, this.nextInodeGeneration()); + return ino; } + // CAS failed — retry this candidate. + checked--; } return ENOSPC; } + private nextInodeGeneration(): number { + return Atomics.add(this.i32, SB_GENERATION >> 2, 1) + 1; + } + private inodeFree(ino: number): void { const ibStart = this.r32(SB_INODE_BITMAP_START) * BLOCK_SIZE; const idx = (ibStart >> 2) + (ino >> 5); @@ -520,6 +596,61 @@ export class SharedFS { if (old === word) break; } Atomics.add(this.i32, SB_FREE_INODES >> 2, 1); + if (ino >= 2 && ino < this.inodeAllocHint) this.inodeAllocHint = ino; + } + + private inodeAddOpenRef(ino: number): void { + const idx = (this.inodeOffset(ino) + INO_OPEN_COUNT) >> 2; + Atomics.add(this.i32, idx, 1); + } + + private inodeDropOpenRef(ino: number): void { + let shouldFree = false; + this.inodeWriteLock(ino); + try { + const off = this.inodeOffset(ino); + const openCount = this.r32(off + INO_OPEN_COUNT); + if (openCount > 0) { + this.w32(off + INO_OPEN_COUNT, openCount - 1); + } + if (openCount <= 1 && this.r32(off + INO_LINK_COUNT) === 0) { + this.inodeTruncate(ino, 0); + shouldFree = true; + } + } finally { + this.inodeWriteUnlock(ino); + } + if (shouldFree) this.inodeFree(ino); + } + + private inodeDropLinkRefLocked(ino: number): boolean { + const off = this.inodeOffset(ino); + const linkCount = this.r32(off + INO_LINK_COUNT); + if (linkCount > 1) { + this.w32(off + INO_LINK_COUNT, linkCount - 1); + this.w64(off + INO_CTIME, Date.now()); + return false; + } + return this.inodeOrphanLocked(ino); + } + + private inodeOrphanLocked(ino: number): boolean { + const off = this.inodeOffset(ino); + this.w32(off + INO_LINK_COUNT, 0); + this.w64(off + INO_CTIME, Date.now()); + if (this.r32(off + INO_OPEN_COUNT) > 0) return false; + const mode = this.r32(off + INO_MODE); + const size = this.r64(off + INO_SIZE); + if ((mode & S_IFMT) === S_IFLNK && size <= INLINE_SYMLINK_SIZE) { + // Short symlink targets are stored inline in the inode's direct-pointer + // area. POSIX unlink removes the symlink inode itself even if the target + // is dangling; do not interpret inline target bytes as block numbers. + this.u8.fill(0, off + INO_DIRECT, off + INO_DIRECT + INLINE_SYMLINK_SIZE); + this.w64(off + INO_SIZE, 0); + } else { + this.inodeTruncate(ino, 0); + } + return true; } // ── Inode locking ──────────────────────────────────────────────── @@ -695,6 +826,11 @@ export class SharedFS { count: number, ): number { const inoOff = this.inodeOffset(ino); + const size = this.r64(inoOff + INO_SIZE); + if (offset > size) { + this.zeroInodeRange(ino, size, offset); + } + let totalWritten = 0; let srcPos = 0; @@ -716,13 +852,31 @@ export class SharedFS { totalWritten += chunk; } - const size = this.r64(inoOff + INO_SIZE); - if (offset > size) { + if (offset > this.r64(inoOff + INO_SIZE)) { this.w64(inoOff + INO_SIZE, offset); } + if (totalWritten > 0) { + const now = Date.now(); + this.w64(inoOff + INO_MTIME, now); + this.w64(inoOff + INO_CTIME, now); + } return totalWritten; } + private zeroInodeRange(ino: number, start: number, end: number): void { + while (start < end) { + const fileBlock = Math.floor(start / BLOCK_SIZE); + const blockOff = start % BLOCK_SIZE; + const chunk = Math.min(BLOCK_SIZE - blockOff, end - start); + const phys = this.inodeBlockMap(ino, fileBlock, false); + if (phys > 0) { + const abs = phys * BLOCK_SIZE + blockOff; + this.u8.fill(0, abs, abs + chunk); + } + start += chunk; + } + } + private freeBlocksFrom(ino: number, fromBlock: number): void { const inoOff = this.inodeOffset(ino); @@ -792,18 +946,236 @@ export class SharedFS { private inodeTruncate(ino: number, newSize: number): void { const inoOff = this.inodeOffset(ino); const curSize = this.r64(inoOff + INO_SIZE); + const sizeChanged = newSize !== curSize; if (newSize >= curSize) { + if (newSize > curSize) { + this.zeroInodeRange(ino, curSize, newSize); + } this.w64(inoOff + INO_SIZE, newSize); + if (sizeChanged) { + const now = Date.now(); + this.w64(inoOff + INO_MTIME, now); + this.w64(inoOff + INO_CTIME, now); + } return; } + if (newSize % BLOCK_SIZE !== 0) { + this.zeroInodeRange( + ino, + newSize, + Math.ceil(newSize / BLOCK_SIZE) * BLOCK_SIZE, + ); + } const keepBlocks = Math.ceil(newSize / BLOCK_SIZE); this.freeBlocksFrom(ino, keepBlocks); this.w64(inoOff + INO_SIZE, newSize); + if (sizeChanged) { + const now = Date.now(); + this.w64(inoOff + INO_MTIME, now); + this.w64(inoOff + INO_CTIME, now); + } } // ── Directory operations ───────────────────────────────────────── + private touchDirectoryMutation(dirIno: number): void { + const inoOff = this.inodeOffset(dirIno); + const now = Date.now(); + this.w64(inoOff + INO_MTIME, now); + this.w64(inoOff + INO_CTIME, now); + } + + private dirNameKey(name: Uint8Array): string { + return safeDecode(name); + } + + private dirEntryNameMatches(abs: number, name: Uint8Array): boolean { + const entNameLen = this.view.getUint16(abs + 6, true); + if (entNameLen !== name.length) return false; + for (let i = 0; i < name.length; i++) { + if (this.u8[abs + DIRENT_HEADER_SIZE + i] !== name[i]) return false; + } + return true; + } + + private rebuildDirIndex( + dirIno: number, + generation: number, + dirSize: number, + ): DirIndex | number { + const entries = new Map(); + const free: Array<{ abs: number; recLen: number }> = []; + let pos = 0; + + while (pos < dirSize) { + const fileBlock = Math.floor(pos / BLOCK_SIZE); + const blockOff = pos % BLOCK_SIZE; + const phys = this.inodeBlockMap(dirIno, fileBlock, false); + if (phys <= 0) return EIO; + + const blockBase = phys * BLOCK_SIZE; + let remain = dirSize - pos; + if (remain > BLOCK_SIZE - blockOff) remain = BLOCK_SIZE - blockOff; + + let off = blockOff; + while (off < blockOff + remain) { + const abs = blockBase + off; + const entIno = this.r32(abs); + const recLen = this.view.getUint16(abs + 4, true); + const entNameLen = this.view.getUint16(abs + 6, true); + + if (recLen === 0) return EIO; + + if (entIno !== 0) { + const name = safeDecode( + this.u8.subarray( + abs + DIRENT_HEADER_SIZE, + abs + DIRENT_HEADER_SIZE + entNameLen, + ), + ); + entries.set(name, { + ino: entIno, + abs, + recLen, + nameLen: entNameLen, + }); + } else if (recLen >= DIRENT_HEADER_SIZE) { + free.push({ abs, recLen }); + } + + off += recLen; + } + pos += remain; + } + + const index = { generation, size: dirSize, entries, free }; + this.dirIndexes.set(dirIno, index); + return index; + } + + private getDirIndex(dirIno: number): DirIndex | null | number { + const inoOff = this.inodeOffset(dirIno); + const dirSize = this.r64(inoOff + INO_SIZE); + const generation = this.r64(inoOff + INO_GENERATION); + const cached = this.dirIndexes.get(dirIno); + if ( + cached && + cached.generation === generation && + cached.size === dirSize + ) { + return cached; + } + if (cached) this.dirIndexes.delete(dirIno); + + if (dirSize < SharedFS.DIR_INDEX_MIN_SIZE) return null; + return this.rebuildDirIndex(dirIno, generation, dirSize); + } + + private updateDirIndexAdd( + dirIno: number, + name: Uint8Array, + childIno: number, + abs: number, + recLen: number, + ): void { + const inoOff = this.inodeOffset(dirIno); + const dirSize = this.r64(inoOff + INO_SIZE); + const generation = this.r64(inoOff + INO_GENERATION); + const index = this.dirIndexes.get(dirIno); + if (!index) return; + if (index.generation !== generation) { + this.dirIndexes.delete(dirIno); + return; + } + index.size = dirSize; + index.entries.set(this.dirNameKey(name), { + ino: childIno, + abs, + recLen, + nameLen: name.length, + }); + } + + private useDirIndexFreeSlot( + index: DirIndex, + dirIno: number, + name: Uint8Array, + childIno: number, + ): boolean { + const needed = align4(DIRENT_HEADER_SIZE + name.length); + + for (let i = index.free.length - 1; i >= 0; i--) { + const slot = index.free[i]; + if (slot.recLen < needed) continue; + index.free.splice(i, 1); + if ( + this.r32(slot.abs) !== 0 || + this.view.getUint16(slot.abs + 4, true) !== slot.recLen + ) { + continue; + } + + this.w32(slot.abs, childIno); + this.view.setUint16(slot.abs + 6, name.length, true); + this.u8.set(name, slot.abs + DIRENT_HEADER_SIZE); + this.touchDirectoryMutation(dirIno); + this.updateDirIndexAdd(dirIno, name, childIno, slot.abs, slot.recLen); + return true; + } + + return false; + } + + private updateDirIndexRemove(dirIno: number, name: Uint8Array): void { + const inoOff = this.inodeOffset(dirIno); + const dirSize = this.r64(inoOff + INO_SIZE); + const generation = this.r64(inoOff + INO_GENERATION); + const index = this.dirIndexes.get(dirIno); + if (!index) return; + if (index.generation !== generation || index.size !== dirSize) { + this.dirIndexes.delete(dirIno); + return; + } + index.entries.delete(this.dirNameKey(name)); + } + + private updateDirIndexRecLen( + dirIno: number, + abs: number, + recLen: number, + ): void { + const index = this.dirIndexes.get(dirIno); + if (!index) return; + for (const entry of index.entries.values()) { + if (entry.abs === abs) { + entry.recLen = recLen; + return; + } + } + } + private dirLookup(dirIno: number, name: Uint8Array): number { + const index = this.getDirIndex(dirIno); + if (typeof index === "number") return index; + if (index) { + const entry = index.entries.get(this.dirNameKey(name)); + if (!entry) return ENOENT; + + // Validate positive hits against the backing directory entry so stale + // in-process indexes cannot resurrect an externally removed name. + if ( + this.r32(entry.abs) === entry.ino && + this.view.getUint16(entry.abs + 4, true) === entry.recLen && + this.view.getUint16(entry.abs + 6, true) === entry.nameLen && + this.dirEntryNameMatches(entry.abs, name) + ) { + return entry.ino; + } + + index.entries.delete(this.dirNameKey(name)); + return ENOENT; + } + const inoOff = this.inodeOffset(dirIno); const dirSize = this.r64(inoOff + INO_SIZE); let pos = 0; @@ -844,11 +1216,113 @@ export class SharedFS { return ENOENT; } + private findLastDirEntryInBlock( + dirIno: number, + fileBlock: number, + endOff: number, + ): number { + const phys = this.inodeBlockMap(dirIno, fileBlock, false); + if (phys <= 0) return -1; + const blockBase = phys * BLOCK_SIZE; + let off = 0; + let lastAbs = -1; + while (off < endOff) { + const abs = blockBase + off; + const recLen = this.view.getUint16(abs + 4, true); + if (recLen === 0 || off + recLen > endOff) return -1; + lastAbs = abs; + off += recLen; + } + return off === endOff ? lastAbs : -1; + } + + private dirAppendEntry( + dirIno: number, + name: Uint8Array, + childIno: number, + lastEntAbs = -1, + ): number { + const inoOff = this.inodeOffset(dirIno); + const dirSize = this.r64(inoOff + INO_SIZE); + const needed = align4(DIRENT_HEADER_SIZE + name.length); + + // No space found — append a new entry at the end. + // Directory entries must not cross block boundaries (like ext2). + let appendPos = dirSize; + let fileBlock = Math.floor(appendPos / BLOCK_SIZE); + let blockOff = appendPos % BLOCK_SIZE; + + if (blockOff !== 0 && blockOff + needed > BLOCK_SIZE) { + // Entry doesn't fit in remaining space — skip to next block. + const gap = BLOCK_SIZE - blockOff; + if (gap >= DIRENT_HEADER_SIZE) { + // Write a padding entry (ino=0) to fill the gap + const padPhys = this.inodeBlockMap(dirIno, fileBlock, false); + if (padPhys > 0) { + const padAbs = padPhys * BLOCK_SIZE + blockOff; + this.w32(padAbs, 0); + this.view.setUint16(padAbs + 4, gap, true); + this.view.setUint16(padAbs + 6, 0, true); + } + } else { + if (lastEntAbs < 0) { + lastEntAbs = this.findLastDirEntryInBlock( + dirIno, + fileBlock, + blockOff, + ); + } + if (lastEntAbs >= 0) { + // Gap too small for a padding entry — extend last entry's recLen + const oldRecLen = this.view.getUint16(lastEntAbs + 4, true); + const newRecLen = oldRecLen + gap; + this.view.setUint16(lastEntAbs + 4, newRecLen, true); + this.updateDirIndexRecLen(dirIno, lastEntAbs, newRecLen); + } + } + appendPos = (fileBlock + 1) * BLOCK_SIZE; + fileBlock++; + blockOff = 0; + } + + // Need a new block? + let phys: number; + if (blockOff === 0) { + phys = this.inodeBlockMap(dirIno, fileBlock, true); + if (phys < 0) return phys; + } else { + phys = this.inodeBlockMap(dirIno, fileBlock, false); + if (phys <= 0) return EIO; + } + + const abs = phys * BLOCK_SIZE + blockOff; + this.w32(abs, childIno); + this.view.setUint16(abs + 4, needed, true); + this.view.setUint16(abs + 6, name.length, true); + this.u8.set(name, abs + DIRENT_HEADER_SIZE); + + this.w64(inoOff + INO_SIZE, appendPos + needed); + this.touchDirectoryMutation(dirIno); + this.updateDirIndexAdd(dirIno, name, childIno, abs, needed); + return 0; + } + private dirAddEntry( dirIno: number, name: Uint8Array, childIno: number, ): number { + const index = this.getDirIndex(dirIno); + if (typeof index === "number") return index; + if (index) { + if (this.useDirIndexFreeSlot(index, dirIno, name, childIno)) return 0; + + // Large indexed directories favor append-only growth when no indexed + // deleted slot is available. A full scan to discover slack on every + // create would reintroduce the O(n²) behavior the index avoids. + return this.dirAppendEntry(dirIno, name, childIno); + } + const inoOff = this.inodeOffset(dirIno); const dirSize = this.r64(inoOff + INO_SIZE); const needed = align4(DIRENT_HEADER_SIZE + name.length); @@ -882,6 +1356,8 @@ export class SharedFS { this.w32(abs, childIno); this.view.setUint16(abs + 6, name.length, true); this.u8.set(name, abs + DIRENT_HEADER_SIZE); + this.touchDirectoryMutation(dirIno); + this.updateDirIndexAdd(dirIno, name, childIno, abs, recLen); return 0; } @@ -896,6 +1372,8 @@ export class SharedFS { this.view.setUint16(newAbs + 4, slack, true); this.view.setUint16(newAbs + 6, name.length, true); this.u8.set(name, newAbs + DIRENT_HEADER_SIZE); + this.touchDirectoryMutation(dirIno); + this.updateDirIndexAdd(dirIno, name, childIno, newAbs, slack); return 0; } @@ -905,55 +1383,106 @@ export class SharedFS { pos += remain; } - // No space found — append a new entry at the end. - // Directory entries must not cross block boundaries (like ext2). - let appendPos = dirSize; - let fileBlock = Math.floor(appendPos / BLOCK_SIZE); - let blockOff = appendPos % BLOCK_SIZE; + return this.dirAppendEntry(dirIno, name, childIno, lastEntAbs); + } - if (blockOff !== 0 && blockOff + needed > BLOCK_SIZE) { - // Entry doesn't fit in remaining space — skip to next block. - const gap = BLOCK_SIZE - blockOff; - if (gap >= DIRENT_HEADER_SIZE) { - // Write a padding entry (ino=0) to fill the gap - const padPhys = this.inodeBlockMap(dirIno, fileBlock, false); - if (padPhys > 0) { - const padAbs = padPhys * BLOCK_SIZE + blockOff; - this.w32(padAbs, 0); - this.view.setUint16(padAbs + 4, gap, true); - this.view.setUint16(padAbs + 6, 0, true); - } - } else if (lastEntAbs >= 0) { - // Gap too small for a padding entry — extend last entry's recLen - const oldRecLen = this.view.getUint16(lastEntAbs + 4, true); - this.view.setUint16(lastEntAbs + 4, oldRecLen + gap, true); + private dirRemoveEntry(dirIno: number, name: Uint8Array): number { + const index = this.getDirIndex(dirIno); + if (typeof index === "number") return index; + if (index) { + const key = this.dirNameKey(name); + const entry = index.entries.get(key); + if (!entry) return ENOENT; + + if ( + this.r32(entry.abs) === entry.ino && + this.view.getUint16(entry.abs + 4, true) === entry.recLen && + this.view.getUint16(entry.abs + 6, true) === entry.nameLen && + this.dirEntryNameMatches(entry.abs, name) + ) { + this.w32(entry.abs, 0); // mark as deleted + index.entries.delete(key); + index.free.push({ abs: entry.abs, recLen: entry.recLen }); + this.touchDirectoryMutation(dirIno); + return 0; } - appendPos = (fileBlock + 1) * BLOCK_SIZE; - fileBlock++; - blockOff = 0; + + index.entries.delete(key); + // Fall through to the linear scan below if the cached slot was stale. } - // Need a new block? - let phys: number; - if (blockOff === 0) { - phys = this.inodeBlockMap(dirIno, fileBlock, true); - if (phys < 0) return phys; - } else { - phys = this.inodeBlockMap(dirIno, fileBlock, false); + const inoOff = this.inodeOffset(dirIno); + const dirSize = this.r64(inoOff + INO_SIZE); + let pos = 0; + + while (pos < dirSize) { + const fileBlock = Math.floor(pos / BLOCK_SIZE); + const blockOff = pos % BLOCK_SIZE; + const phys = this.inodeBlockMap(dirIno, fileBlock, false); if (phys <= 0) return EIO; - } - const abs = phys * BLOCK_SIZE + blockOff; - this.w32(abs, childIno); - this.view.setUint16(abs + 4, needed, true); - this.view.setUint16(abs + 6, name.length, true); - this.u8.set(name, abs + DIRENT_HEADER_SIZE); + const blockBase = phys * BLOCK_SIZE; + let remain = dirSize - pos; + if (remain > BLOCK_SIZE - blockOff) remain = BLOCK_SIZE - blockOff; - this.w64(inoOff + INO_SIZE, appendPos + needed); - return 0; + let off = blockOff; + while (off < blockOff + remain) { + const abs = blockBase + off; + const entIno = this.r32(abs); + const recLen = this.view.getUint16(abs + 4, true); + const entNameLen = this.view.getUint16(abs + 6, true); + + if (recLen === 0) return EIO; + + if (entIno !== 0 && entNameLen === name.length) { + let match = true; + for (let i = 0; i < name.length; i++) { + if (this.u8[abs + DIRENT_HEADER_SIZE + i] !== name[i]) { + match = false; + break; + } + } + if (match) { + this.w32(abs, 0); // mark as deleted + this.touchDirectoryMutation(dirIno); + this.updateDirIndexRemove(dirIno, name); + return 0; + } + } + off += recLen; + } + pos += remain; + } + return ENOENT; } - private dirRemoveEntry(dirIno: number, name: Uint8Array): number { + private dirReplaceEntryIno( + dirIno: number, + name: Uint8Array, + childIno: number, + ): number { + const index = this.getDirIndex(dirIno); + if (typeof index === "number") return index; + if (index) { + const key = this.dirNameKey(name); + const entry = index.entries.get(key); + + if ( + entry && + this.r32(entry.abs) === entry.ino && + this.view.getUint16(entry.abs + 4, true) === entry.recLen && + this.view.getUint16(entry.abs + 6, true) === entry.nameLen && + this.dirEntryNameMatches(entry.abs, name) + ) { + this.w32(entry.abs, childIno); + entry.ino = childIno; + return 0; + } + + if (entry) index.entries.delete(key); + // Fall through to the linear scan below if the cached slot was stale. + } + const inoOff = this.inodeOffset(dirIno); const dirSize = this.r64(inoOff + INO_SIZE); let pos = 0; @@ -986,7 +1515,8 @@ export class SharedFS { } } if (match) { - this.w32(abs, 0); // mark as deleted + this.w32(abs, childIno); + this.updateDirIndexAdd(dirIno, name, childIno, abs, recLen); return 0; } } @@ -1045,6 +1575,21 @@ export class SharedFS { return true; } + private dirIsAncestor(ancestorIno: number, dirIno: number): boolean { + let cur = dirIno; + + for (let depth = 0; depth < MAX_SYMLINK_HOPS * 1024; depth++) { + if (cur === ancestorIno) return true; + if (cur === ROOT_INO) return false; + + const parent = this.dirLookup(cur, DOTDOT_BYTES); + if (parent < 0 || parent === cur) return false; + cur = parent; + } + + return true; + } + // ── Path resolution ────────────────────────────────────────────── private pathResolve(path: string, followSymlinks: boolean): number { @@ -1159,6 +1704,7 @@ export class SharedFS { this.w64(base + FD_OFFSET, 0); this.w32(base + FD_FLAGS, flags); this.w32(base + FD_IS_DIR, isDir ? 1 : 0); + this.inodeAddOpenRef(ino); return i; } } @@ -1200,6 +1746,7 @@ export class SharedFS { const off = this.inodeOffset(ino); return { ino, + generation: this.r64(off + INO_GENERATION), mode: this.r32(off + INO_MODE), linkCount: this.r32(off + INO_LINK_COUNT), size: this.r64(off + INO_SIZE), @@ -1216,6 +1763,13 @@ export class SharedFS { open(path: string, flags: number, createMode: number = 0o644): number { const accMode = flags & O_ACCMODE; const creating = (flags & O_CREAT) !== 0; + const exclusive = (flags & O_EXCL) !== 0; + + if (creating && exclusive) { + const existing = this.pathResolve(path, false); + if (existing >= 0) throw new SFSError(EEXIST); + if (existing !== ENOENT) throw new SFSError(existing); + } let ino = this.pathResolve(path, true); @@ -1228,6 +1782,7 @@ export class SharedFS { const nameBytes = encoder.encode(name); const existing = this.dirLookup(parentIno, nameBytes); if (existing >= 0) { + if (exclusive) throw new SFSError(EEXIST); ino = existing; } else { const newIno = this.inodeAlloc(); @@ -1279,12 +1834,6 @@ export class SharedFS { const fd = this.fdAlloc(ino, flags, false); if (fd < 0) throw new SFSError(fd); - // If append, set offset to end - if (flags & O_APPEND) { - const base = FD_TABLE_OFFSET + fd * FD_ENTRY_SIZE; - this.w64(base + FD_OFFSET, this.r64(inoOff + INO_SIZE)); - } - return fd; } @@ -1292,11 +1841,15 @@ export class SharedFS { const entry = this.fdGet(fd); if (!entry) throw new SFSError(EBADF); this.fdFree(fd); + this.inodeDropOpenRef(entry.ino); } read(fd: number, buffer: Uint8Array): number { const entry = this.fdGet(fd); if (!entry) throw new SFSError(EBADF); + const inoOff = this.inodeOffset(entry.ino); + const mode = this.r32(inoOff + INO_MODE); + if ((mode & S_IFMT) === S_IFDIR) throw new SFSError(EISDIR); this.inodeReadLock(entry.ino); try { @@ -1420,6 +1973,7 @@ export class SharedFS { unlink(path: string): void { const { parentIno, name } = this.pathResolveParent(path); const nameBytes = encoder.encode(name); + const requiresDirectory = path.length > 1 && path.endsWith("/"); this.inodeWriteLock(parentIno); try { @@ -1428,22 +1982,22 @@ export class SharedFS { const childOff = this.inodeOffset(childIno); const mode = this.r32(childOff + INO_MODE); + if (requiresDirectory && (mode & S_IFMT) !== S_IFDIR) { + throw new SFSError(ENOTDIR); + } if ((mode & S_IFMT) === S_IFDIR) throw new SFSError(EISDIR); const rc = this.dirRemoveEntry(parentIno, nameBytes); if (rc < 0) throw new SFSError(rc); + let shouldFree = false; this.inodeWriteLock(childIno); - const linkCount = this.r32(childOff + INO_LINK_COUNT); - if (linkCount <= 1) { - this.inodeTruncate(childIno, 0); - this.w32(childOff + INO_LINK_COUNT, 0); - this.inodeWriteUnlock(childIno); - this.inodeFree(childIno); - } else { - this.w32(childOff + INO_LINK_COUNT, linkCount - 1); + try { + shouldFree = this.inodeDropLinkRefLocked(childIno); + } finally { this.inodeWriteUnlock(childIno); } + if (shouldFree) this.inodeFree(childIno); } finally { this.inodeWriteUnlock(parentIno); } @@ -1456,6 +2010,8 @@ export class SharedFS { this.pathResolveParent(newPath); const oldNameBytes = encoder.encode(oldName); const newNameBytes = encoder.encode(newName); + const oldRequiresDirectory = oldPath.length > 1 && oldPath.endsWith("/"); + const newRequiresDirectory = newPath.length > 1 && newPath.endsWith("/"); // Lock both parents (consistent order to avoid deadlock) const first = Math.min(oldParent, newParent); @@ -1466,19 +2022,63 @@ export class SharedFS { try { const srcIno = this.dirLookup(oldParent, oldNameBytes); if (srcIno < 0) throw new SFSError(srcIno); + const srcOff = this.inodeOffset(srcIno); + const srcMode = this.r32(srcOff + INO_MODE); + const srcType = srcMode & S_IFMT; + + if ( + (oldRequiresDirectory || newRequiresDirectory) && + srcType !== S_IFDIR + ) { + throw new SFSError(ENOTDIR); + } + + if (srcType === S_IFDIR && this.dirIsAncestor(srcIno, newParent)) { + throw new SFSError(EINVAL); + } // Remove any existing entry at destination const existingIno = this.dirLookup(newParent, newNameBytes); + let removedExistingDirectory = false; if (existingIno >= 0) { + if (existingIno === srcIno) { + return; + } const existOff = this.inodeOffset(existingIno); const existMode = this.r32(existOff + INO_MODE); - if ((existMode & S_IFMT) === S_IFDIR) throw new SFSError(EISDIR); - this.dirRemoveEntry(newParent, newNameBytes); - this.inodeWriteLock(existingIno); - this.inodeTruncate(existingIno, 0); - this.w32(existOff + INO_LINK_COUNT, 0); - this.inodeWriteUnlock(existingIno); - this.inodeFree(existingIno); + const existType = existMode & S_IFMT; + + if (srcType === S_IFDIR) { + if (existType !== S_IFDIR) throw new SFSError(ENOTDIR); + + let shouldFreeExisting = false; + this.inodeWriteLock(existingIno); + try { + if (!this.dirIsEmpty(existingIno)) throw new SFSError(ENOTEMPTY); + const rc = this.dirRemoveEntry(newParent, newNameBytes); + if (rc < 0) throw new SFSError(rc); + shouldFreeExisting = this.inodeOrphanLocked(existingIno); + } finally { + this.inodeWriteUnlock(existingIno); + } + if (shouldFreeExisting) this.inodeFree(existingIno); + removedExistingDirectory = true; + } else { + if (existType === S_IFDIR) { + throw new SFSError(newRequiresDirectory ? ENOTDIR : EISDIR); + } + + const rc = this.dirRemoveEntry(newParent, newNameBytes); + if (rc < 0) throw new SFSError(rc); + let shouldFreeExisting = false; + this.inodeWriteLock(existingIno); + try { + shouldFreeExisting = this.inodeDropLinkRefLocked(existingIno); + } finally { + this.inodeWriteUnlock(existingIno); + } + if (shouldFreeExisting) this.inodeFree(existingIno); + } } // Add entry in new directory @@ -1489,21 +2089,45 @@ export class SharedFS { this.dirRemoveEntry(oldParent, oldNameBytes); // Update link counts for directory renames - const srcOff = this.inodeOffset(srcIno); - const srcMode = this.r32(srcOff + INO_MODE); - if ( - (srcMode & S_IFMT) === S_IFDIR && - oldParent !== newParent - ) { - const oldPOff = this.inodeOffset(oldParent); - this.w32( - oldPOff + INO_LINK_COUNT, - this.r32(oldPOff + INO_LINK_COUNT) - 1, - ); + if (srcType === S_IFDIR) { + if (oldParent !== newParent) { + const oldPOff = this.inodeOffset(oldParent); + this.w32( + oldPOff + INO_LINK_COUNT, + this.r32(oldPOff + INO_LINK_COUNT) - 1, + ); + const newPOff = this.inodeOffset(newParent); + this.w32( + newPOff + INO_LINK_COUNT, + this.r32(newPOff + INO_LINK_COUNT) + 1, + ); + + this.inodeWriteLock(srcIno); + try { + const dotdotRc = this.dirReplaceEntryIno( + srcIno, + DOTDOT_BYTES, + newParent, + ); + if (dotdotRc < 0) throw new SFSError(dotdotRc); + this.w64(srcOff + INO_CTIME, Date.now()); + } finally { + this.inodeWriteUnlock(srcIno); + } + } + + if (removedExistingDirectory) { + const newPOff = this.inodeOffset(newParent); + this.w32( + newPOff + INO_LINK_COUNT, + this.r32(newPOff + INO_LINK_COUNT) - 1, + ); + } + } else if (removedExistingDirectory) { const newPOff = this.inodeOffset(newParent); this.w32( newPOff + INO_LINK_COUNT, - this.r32(newPOff + INO_LINK_COUNT) + 1, + this.r32(newPOff + INO_LINK_COUNT) - 1, ); } } finally { @@ -1590,17 +2214,17 @@ export class SharedFS { const mode = this.r32(childOff + INO_MODE); if ((mode & S_IFMT) !== S_IFDIR) throw new SFSError(ENOTDIR); + let shouldFree = false; this.inodeWriteLock(childIno); try { if (!this.dirIsEmpty(childIno)) throw new SFSError(ENOTEMPTY); this.dirRemoveEntry(parentIno, nameBytes); - this.inodeTruncate(childIno, 0); - this.w32(childOff + INO_LINK_COUNT, 0); + shouldFree = this.inodeOrphanLocked(childIno); } finally { this.inodeWriteUnlock(childIno); } - this.inodeFree(childIno); + if (shouldFree) this.inodeFree(childIno); // Decrement parent link count const pOff = this.inodeOffset(parentIno); @@ -1662,7 +2286,8 @@ export class SharedFS { try { const off = this.inodeOffset(ino); const oldMode = this.r32(off + INO_MODE); - this.w32(off + INO_MODE, (oldMode & S_IFMT) | (mode & 0o7777)); + const fileType = (mode & S_IFMT) || (oldMode & S_IFMT); + this.w32(off + INO_MODE, fileType | (mode & 0o7777)); this.w64(off + INO_CTIME, Date.now()); } finally { this.inodeWriteUnlock(ino); @@ -1676,7 +2301,8 @@ export class SharedFS { try { const off = this.inodeOffset(entry.ino); const oldMode = this.r32(off + INO_MODE); - this.w32(off + INO_MODE, (oldMode & S_IFMT) | (mode & 0o7777)); + const fileType = (mode & S_IFMT) || (oldMode & S_IFMT); + this.w32(off + INO_MODE, fileType | (mode & 0o7777)); this.w64(off + INO_CTIME, Date.now()); } finally { this.inodeWriteUnlock(entry.ino); diff --git a/host/src/vfs/time.ts b/host/src/vfs/time.ts index 0136cd676..4a9f82c6d 100644 --- a/host/src/vfs/time.ts +++ b/host/src/vfs/time.ts @@ -23,8 +23,8 @@ export class NodeTimeProvider implements TimeProvider { const elapsed = ns - this._startNs; return { sec: Number(elapsed / 1000000000n), nsec: Number(elapsed % 1000000000n) }; } - if (clockId === 1) { - // CLOCK_MONOTONIC + if (clockId === 1 || clockId === 7) { + // CLOCK_MONOTONIC / CLOCK_BOOTTIME return { sec: Number(ns / 1000000000n), nsec: Number(ns % 1000000000n) }; } // CLOCK_REALTIME — use hrtime + epoch offset for nanosecond resolution @@ -43,8 +43,8 @@ export class NodeTimeProvider implements TimeProvider { export class BrowserTimeProvider implements TimeProvider { clockGettime(clockId: number): { sec: number; nsec: number } { - if (clockId === 1 || clockId === 2 || clockId === 3) { - // CLOCK_MONOTONIC / CLOCK_PROCESS_CPUTIME_ID / CLOCK_THREAD_CPUTIME_ID + if (clockId === 1 || clockId === 2 || clockId === 3 || clockId === 7) { + // CLOCK_MONOTONIC / CPU-time clocks / CLOCK_BOOTTIME const ms = performance.now(); return { sec: Math.floor(ms / 1000), nsec: Math.floor((ms % 1000) * 1_000_000) }; } diff --git a/host/src/worker-adapter.ts b/host/src/worker-adapter.ts index 19c919f48..635337f2b 100644 --- a/host/src/worker-adapter.ts +++ b/host/src/worker-adapter.ts @@ -97,16 +97,43 @@ export class MockWorkerAdapter implements WorkerAdapter { // --- Node.js implementation --- -import { Worker } from "node:worker_threads"; +import { Worker, type WorkerOptions } from "node:worker_threads"; import { pathToFileURL } from "node:url"; import { createRequire } from "node:module"; import { existsSync } from "node:fs"; +// Wasm guest stacks consume the embedding worker's native stack when engines +// recurse through Wasm frames. Keep the default high enough for stack-heavy +// POSIX workloads while retaining an environment override for constrained +// embedders. +const DEFAULT_NODE_WORKER_STACK_SIZE_MB = 32; + function currentModuleUrl(): string { if (typeof __filename !== "undefined") return pathToFileURL(__filename).href; return import.meta.url; } +function nodeWorkerStackSizeMb(): number { + const raw = process.env.KANDELO_NODE_WORKER_STACK_SIZE_MB; + if (raw === undefined || raw === "") return DEFAULT_NODE_WORKER_STACK_SIZE_MB; + const parsed = Number(raw); + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error(`invalid KANDELO_NODE_WORKER_STACK_SIZE_MB: ${raw}`); + } + return parsed; +} + +function nodeWorkerOptions(workerData: unknown, options: WorkerOptions = {}): WorkerOptions { + return { + ...options, + workerData, + resourceLimits: { + ...options.resourceLimits, + stackSizeMb: nodeWorkerStackSizeMb(), + }, + }; +} + export class NodeWorkerAdapter implements WorkerAdapter { private entryUrl: URL; private _compiledEntry: URL | false | undefined; @@ -153,7 +180,7 @@ export class NodeWorkerAdapter implements WorkerAdapter { // bootstrap which takes >500ms with 10+ concurrent workers). const compiledEntry = this.resolveCompiledEntry(); if (compiledEntry) { - const worker = new Worker(compiledEntry, { workerData }); + const worker = new Worker(compiledEntry, nodeWorkerOptions(workerData)); return new NodeWorkerHandle(worker); } @@ -169,10 +196,9 @@ export class NodeWorkerAdapter implements WorkerAdapter { `await import('${entryUrl}');`, ].join("\n"); - const worker = new Worker(bootstrap, { + const worker = new Worker(bootstrap, nodeWorkerOptions(workerData, { eval: true, - workerData, - }); + })); return new NodeWorkerHandle(worker); } } diff --git a/host/src/worker-main.ts b/host/src/worker-main.ts index 569ebe4ca..be1b768f5 100644 --- a/host/src/worker-main.ts +++ b/host/src/worker-main.ts @@ -204,6 +204,7 @@ function buildDlopenImports( getStackPointer: () => WebAssembly.Global | undefined, getInstance: () => WebAssembly.Instance | undefined, ptrWidth: 4 | 8, + longjmpTag?: WebAssembly.ExportValue, ): DlopenSupport { let linker: DynamicLinker | null = null; const loadedLibraries = new Map(); @@ -275,6 +276,7 @@ function buildDlopenImports( ]); const globalSymbols = new Map(); const inst = getInstance(); + const mainLongjmpTag = inst?.exports.__c_longjmp; if (inst) { for (const [name, exp] of Object.entries(inst.exports)) { if (RESERVED.has(name)) continue; @@ -292,6 +294,7 @@ function buildDlopenImports( globalSymbols, got: new Map(), loadedLibraries, + longjmpTag: mainLongjmpTag ?? longjmpTag, sideModuleFork: { setActiveFork: (state) => { activeSideFork = state; @@ -512,6 +515,15 @@ function buildDlopenImports( return { imports, replayDlopens, completeSideModuleForkUnwind, beginSideModuleForkRewind }; } +function createLongjmpTag(): WebAssembly.ExportValue | undefined { + const Tag = ( + WebAssembly as typeof WebAssembly & { + Tag?: new (descriptor: { parameters: string[] }) => WebAssembly.ExportValue; + } + ).Tag; + return Tag ? new Tag({ parameters: ["i32"] }) : undefined; +} + /** * Build import object for a Wasm module, stubbing unresolved imports. */ @@ -523,6 +535,12 @@ function buildImportObject( dlopenImports?: Record, getInstance?: () => WebAssembly.Instance | undefined, ptrWidth: 4 | 8 = 4, + postVmInterruptTimer?: ( + timedOutPtr: number, + vmInterruptPtr: number, + seconds: number, + ) => void, + longjmpTag?: WebAssembly.ExportValue, ): WebAssembly.Imports { const envImports: Record = { memory }; /** Convert wasm64 BigInt pointer to number (safe since addresses < 4GB) */ @@ -544,13 +562,9 @@ function buildImportObject( // llvm/lld ≥22 emit __c_longjmp as a tag import for setjmp users; instantiation fails silently without it. if (moduleImports.some(i => i.module === "env" && i.name === "__c_longjmp" && (i.kind as string) === "tag")) { - const Tag = ( - WebAssembly as typeof WebAssembly & { - Tag?: new (descriptor: { parameters: string[] }) => WebAssembly.ExportValue; - } - ).Tag; - if (Tag) { - envImports.__c_longjmp = new Tag({ parameters: ["i32"] }); + const tag = longjmpTag ?? createLongjmpTag(); + if (tag) { + envImports.__c_longjmp = tag; } } @@ -559,6 +573,23 @@ function buildImportObject( Object.assign(envImports, dlopenImports); } + if ( + moduleImports.some( + (i) => + i.module === "env" && + i.name === "__wasm_posix_vm_interrupt_after" && + i.kind === "function", + ) + ) { + envImports.__wasm_posix_vm_interrupt_after = ( + timedOutPtr: number | bigint, + vmInterruptPtr: number | bigint, + seconds: number | bigint, + ): void => { + postVmInterruptTimer?.(n(timedOutPtr), n(vmInterruptPtr), n(seconds)); + }; + } + // C++ operator new/delete fallbacks — delegate to the wasm instance's malloc/free. // Normally resolved by MariaDB's my_new.cc (USE_MYSYS_NEW), but kept as safety net. if (getInstance) { @@ -957,6 +988,7 @@ export async function centralizedWorkerMain( // Fork state — captured by kernel_fork closure let forkResult = 0; const forkBufAddr = channelOffset - FORK_BUF_SIZE; + const processLongjmpTag = createLongjmpTag(); if (hasForkInstrumentation) { // Override kernel_fork with fork-instrumentation-aware version. @@ -991,9 +1023,20 @@ export async function centralizedWorkerMain( () => processInstance?.exports.__stack_pointer as WebAssembly.Global | undefined, () => processInstance ?? undefined, ptrWidth, + processLongjmpTag, ); const importObject = buildImportObject(module, memory, kernelImports, channelOffset, dlopenSupport.imports, - () => processInstance ?? undefined, ptrWidth); + () => processInstance ?? undefined, ptrWidth, + (timedOutPtr, vmInterruptPtr, seconds) => { + port.postMessage({ + type: "vm_interrupt_timer", + pid, + timedOutPtr, + vmInterruptPtr, + seconds, + } satisfies WorkerToHostMessage); + }, + processLongjmpTag); const instance = await WebAssembly.instantiate(module, importObject); processInstance = instance; verifyProgramAbi(programBytes, initData.kernelAbiVersion, pid); @@ -1144,9 +1187,20 @@ export async function centralizedWorkerMain( () => processInstance?.exports.__stack_pointer as WebAssembly.Global | undefined, () => processInstance ?? undefined, ptrWidth, + processLongjmpTag, ); const importObject = buildImportObject(module, memory, kernelImports, channelOffset, dlopenSupport.imports, - () => processInstance ?? undefined, ptrWidth); + () => processInstance ?? undefined, ptrWidth, + (timedOutPtr, vmInterruptPtr, seconds) => { + port.postMessage({ + type: "vm_interrupt_timer", + pid, + timedOutPtr, + vmInterruptPtr, + seconds, + } satisfies WorkerToHostMessage); + }, + processLongjmpTag); const instance = await WebAssembly.instantiate(module, importObject); processInstance = instance; verifyProgramAbi(programBytes, initData.kernelAbiVersion, pid); @@ -1833,7 +1887,16 @@ export async function centralizedThreadWorkerMain( }; } const importObject = buildImportObject(module, memory, kernelImports, channelOffset, undefined, - () => threadInstance, ptrWidth); + () => threadInstance, ptrWidth, + (timedOutPtr, vmInterruptPtr, seconds) => { + port.postMessage({ + type: "vm_interrupt_timer", + pid, + timedOutPtr, + vmInterruptPtr, + seconds, + } satisfies WorkerToHostMessage); + }); const instance = new WebAssembly.Instance(module, importObject); threadInstance = instance; diff --git a/host/src/worker-protocol.ts b/host/src/worker-protocol.ts index b7c4b127c..c837a91ab 100644 --- a/host/src/worker-protocol.ts +++ b/host/src/worker-protocol.ts @@ -99,7 +99,8 @@ export type WorkerToHostMessage = | WorkerErrorMessage | ExecRequestMessage | ExecCompleteMessage - | AlarmSetMessage; + | AlarmSetMessage + | VmInterruptTimerMessage; export interface WorkerReadyMessage { type: "ready"; @@ -141,6 +142,14 @@ export interface AlarmSetMessage { seconds: number; } +export interface VmInterruptTimerMessage { + type: "vm_interrupt_timer"; + pid: number; + timedOutPtr: number; + vmInterruptPtr: number; + seconds: number; +} + export interface ExecReplyMessage { type: "exec_reply"; wasmBytes: ArrayBuffer; diff --git a/host/test/dylink.test.ts b/host/test/dylink.test.ts index c9ad7bc05..b0fdea2ea 100644 --- a/host/test/dylink.test.ts +++ b/host/test/dylink.test.ts @@ -31,6 +31,70 @@ function buildSharedLib(source: string, name: string): Uint8Array { return new Uint8Array(readFileSync(soPath)); } +function encodeVarUint(value: number): number[] { + const bytes: number[] = []; + do { + let byte = value & 0x7f; + value >>>= 7; + if (value !== 0) byte |= 0x80; + bytes.push(byte); + } while (value !== 0); + return bytes; +} + +function wasmString(value: string): number[] { + const bytes = [...new TextEncoder().encode(value)]; + return [...encodeVarUint(bytes.length), ...bytes]; +} + +function wasmSection(id: number, payload: number[]): number[] { + return [id, ...encodeVarUint(payload.length), ...payload]; +} + +function buildLongjmpThrowingSideModule(): Uint8Array { + const dylink = [ + ...wasmString("dylink.0"), + 1, 4, // WASM_DYLINK_MEM_INFO subsection, four zero LEB fields. + 0, 0, 0, 0, + ]; + const type = [ + 2, + 0x60, 1, 0x7f, 0, // tag type: (i32) -> () + 0x60, 0, 0, // exported throwLongjmp type: () -> () + ]; + const imports = [ + 1, + ...wasmString("env"), + ...wasmString("__c_longjmp"), + 4, 0, 0, // external kind tag, attribute 0, type index 0 + ]; + const functions = [1, 1]; // one function using type index 1 + const exports = [ + 1, + ...wasmString("throwLongjmp"), + 0, 0, // function export, function index 0 + ]; + const code = [ + 1, + 6, // body size + 0, // local decl count + 0x41, 7, // i32.const 7 + 0x08, 0, // throw imported tag 0 + 0x0b, + ]; + + return new Uint8Array([ + 0x00, 0x61, 0x73, 0x6d, + 0x01, 0x00, 0x00, 0x00, + ...wasmSection(0, dylink), + ...wasmSection(1, type), + ...wasmSection(2, imports), + ...wasmSection(3, functions), + ...wasmSection(7, exports), + ...wasmSection(10, code), + ]); +} + describe.skipIf(!hasCompiler())("dylink.0 parser", () => { it("parses a simple shared library", () => { const wasmBytes = buildSharedLib( @@ -74,6 +138,40 @@ describe.skipIf(!hasCompiler())("dylink.0 parser", () => { }); }); +describe.skipIf(typeof (WebAssembly as any).Tag !== "function")("shared wasm SjLj tag imports", () => { + it("uses the process-wide __c_longjmp tag for side modules", () => { + const Tag = (WebAssembly as any).Tag; + const wasmException = (WebAssembly as any).Exception; + const longjmpTag = new Tag({ parameters: ["i32"] }) as WebAssembly.ExportValue; + const memory = new WebAssembly.Memory({ initial: 1, maximum: 100, shared: true }); + const table = new WebAssembly.Table({ initial: 1, element: "anyfunc" }); + const stackPointer = new WebAssembly.Global( + { value: "i32", mutable: true }, + 65536, + ); + + const lib = loadSharedLibrarySync("libthrows-longjmp.so", buildLongjmpThrowingSideModule(), { + memory, + table, + stackPointer, + heapPointer: { value: 1024 }, + globalSymbols: new Map(), + got: new Map(), + loadedLibraries: new Map(), + longjmpTag, + }); + + try { + (lib.exports.throwLongjmp as Function)(); + expect.unreachable("throwLongjmp should throw the imported __c_longjmp tag"); + } catch (error) { + expect(error).toBeInstanceOf(wasmException); + expect((error as any).is(longjmpTag)).toBe(true); + expect((error as any).getArg(longjmpTag, 0)).toBe(7); + } + }); +}); + describe.skipIf(!hasCompiler())("shared library loading", () => { function createLoadOptions(): LoadSharedLibraryOptions { const memory = new WebAssembly.Memory({ initial: 1, maximum: 100, shared: true }); diff --git a/host/test/fetch-backend.test.ts b/host/test/fetch-backend.test.ts index f149e5f84..c3537d965 100644 --- a/host/test/fetch-backend.test.ts +++ b/host/test/fetch-backend.test.ts @@ -4,6 +4,7 @@ import { TlsNetworkBackend } from "../src/networking/tls-network-backend"; const encoder = new TextEncoder(); const decoder = new TextDecoder(); +const MSG_PEEK = 0x0002; afterEach(() => { vi.restoreAllMocks(); @@ -46,13 +47,25 @@ async function recvWhenReady( throw new Error("timed out waiting for response"); } +async function waitForReadable( + backend: Pick, + handle: number, +): Promise { + const deadline = Date.now() + 1_000; + while (Date.now() < deadline) { + if ((backend.poll(handle, 0x0001) & 0x0001) !== 0) return; + await new Promise((resolve) => setTimeout(resolve, 0)); + } + throw new Error("timed out waiting for readable poll"); +} + describe("FetchNetworkBackend", () => { afterEach(() => { vi.restoreAllMocks(); }); describe("getaddrinfo", () => { - it("returns a 4-byte address for any hostname", () => { + it("returns a 4-byte address for DNS names that can be deferred to fetch", () => { const backend = new FetchNetworkBackend(); const addr = backend.getaddrinfo("example.com"); expect(addr.length).toBe(4); @@ -83,6 +96,19 @@ describe("FetchNetworkBackend", () => { expect(() => backend.getaddrinfo(".toto.toto.toto")).toThrow("ENOENT"); expect(() => backend.getaddrinfo(`www.${"x".repeat(100)}.com`)).toThrow("ENOENT"); }); + + it("rejects names the browser resolver cannot truthfully synthesize", () => { + const backend = new FetchNetworkBackend(); + expect(() => backend.getaddrinfo("dummy-host-name")).toThrow("ENOENT"); + expect(() => backend.getaddrinfo("totes.invalid")).toThrow("ENOENT"); + }); + + it("allows explicitly aliased unqualified names", () => { + const backend = new FetchNetworkBackend({ + hostAliases: { registry: "registry.npmjs.org" }, + }); + expect(backend.getaddrinfo("registry").length).toBe(4); + }); }); describe("connect", () => { @@ -126,6 +152,32 @@ describe("FetchNetworkBackend", () => { }); }); + it("honors MSG_PEEK without consuming buffered response bytes", async () => { + vi.stubGlobal("fetch", vi.fn().mockResolvedValue(new Response("hello"))); + const backend = new FetchNetworkBackend(); + const addr = backend.getaddrinfo("example.com"); + backend.connect(1, addr, 80); + backend.send( + 1, + encoder.encode("GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"), + 0, + ); + + const first = decoder.decode(await recvWhenReady(backend, 1)); + expect(first).toContain("hello"); + + vi.stubGlobal("fetch", vi.fn().mockResolvedValue(new Response("world"))); + backend.send( + 1, + encoder.encode("GET /2 HTTP/1.1\r\nHost: example.com\r\n\r\n"), + 0, + ); + await waitForReadable(backend, 1); + const peeked = decoder.decode(backend.recv(1, 4, MSG_PEEK)); + const consumed = decoder.decode(backend.recv(1, 4, 0)); + expect(peeked).toBe(consumed); + }); + describe("hostAliases", () => { it("rewrites the fetch target while preserving the request port", () => { const fetchMock = vi @@ -169,6 +221,19 @@ describe("TlsNetworkBackend HTTP proxy path", () => { expect(() => backend.getaddrinfo(".toto.toto.toto")).toThrow("ENOENT"); expect(() => backend.getaddrinfo(`www.${"x".repeat(100)}.com`)).toThrow("ENOENT"); }); + + it("rejects special-use invalid and unqualified names", () => { + const backend = new TlsNetworkBackend(); + expect(() => backend.getaddrinfo("dummy-host-name")).toThrow("ENOENT"); + expect(() => backend.getaddrinfo("totes.invalid")).toThrow("ENOENT"); + }); + + it("allows explicitly aliased unqualified names", () => { + const backend = new TlsNetworkBackend({ + dnsAliases: { registry: "https://registry.npmjs.org" }, + }); + expect(backend.getaddrinfo("registry").length).toBe(4); + }); }); it("resets response state for keep-alive HTTP requests", async () => { @@ -220,4 +285,26 @@ describe("TlsNetworkBackend HTTP proxy path", () => { expect(response.toLowerCase()).not.toContain("content-encoding"); expect(response.toLowerCase()).not.toContain("connection: close"); }); + + it("honors MSG_PEEK without consuming HTTP response bytes", async () => { + vi.stubGlobal("fetch", vi.fn().mockResolvedValue(new Response("peek-body"))); + const backend = new TlsNetworkBackend(); + const addr = backend.getaddrinfo("proxy.local"); + backend.connect(1, addr, 80); + + sendGet(backend, 1, "/peek"); + await recvWhenReady(backend, 1); + + vi.stubGlobal("fetch", vi.fn().mockResolvedValue(new Response("second-body"))); + sendGet(backend, 1, "/peek2"); + await recvWhenReady(backend, 1); + + vi.stubGlobal("fetch", vi.fn().mockResolvedValue(new Response("third-body"))); + sendGet(backend, 1, "/peek3"); + const peeked = decoder.decode((await recvWhenReady({ + recv: (handle, maxLen) => backend.recv(handle, maxLen, MSG_PEEK), + }, 1)).subarray(0, 8)); + const consumed = decoder.decode(backend.recv(1, 8, 0)); + expect(peeked).toBe(consumed); + }); }); diff --git a/host/test/ifhwaddr.test.ts b/host/test/ifhwaddr.test.ts index 8dba4b587..714dc400b 100644 --- a/host/test/ifhwaddr.test.ts +++ b/host/test/ifhwaddr.test.ts @@ -15,6 +15,8 @@ describe("SIOCGIFCONF / SIOCGIFHWADDR", () => { // Should find one interface named "eth0" expect(result.stdout).toContain("interfaces: 1"); expect(result.stdout).toContain("name: eth0"); + expect(result.stdout).toContain("index: 1"); + expect(result.stdout).toContain("index-name: eth0"); // MAC should be locally-administered and non-zero expect(result.stdout).toContain("locally-administered: yes"); diff --git a/host/test/node-host-vfs-only-metadata.test.ts b/host/test/node-host-vfs-only-metadata.test.ts index c5b0ee8b3..f0d3bc77e 100644 --- a/host/test/node-host-vfs-only-metadata.test.ts +++ b/host/test/node-host-vfs-only-metadata.test.ts @@ -272,6 +272,26 @@ describe.each(backendFactories)("%s", (_name, makeCase) => { }); }); +describe("HostFileSystem default virtual ownership", () => { + it("can present existing host-backed files as owned by a chosen guest uid/gid", () => { + const root = makeTempRoot("wasm-posix-host-fs-default-owner-"); + const native = join(root, "owned-by-mount"); + writeFileSync(native, "data"); + const before = statSync(native); + + const backend = new HostFileSystem(root, "/", { uid: 65534, gid: 65533 }); + const virtual = backend.stat("/owned-by-mount"); + expect(virtual.uid).toBe(65534); + expect(virtual.gid).toBe(65533); + + backend.chown("/owned-by-mount", 1000, 1001); + const changed = backend.stat("/owned-by-mount"); + expect(changed.uid).toBe(1000); + expect(changed.gid).toBe(1001); + expectNativeMetadataUnchanged(native, before); + }); +}); + describe("VirtualPlatformIO on Node host mounts", () => { it("routes metadata operations to HostFileSystem as VFS-only changes", () => { const root = makeTempRoot("wasm-posix-virtual-platform-vfs-only-"); diff --git a/host/test/select-timeout-retry.test.ts b/host/test/select-timeout-retry.test.ts index 4656d4845..752920a47 100644 --- a/host/test/select-timeout-retry.test.ts +++ b/host/test/select-timeout-retry.test.ts @@ -64,6 +64,45 @@ describe("centralized select/pselect timeout retries", () => { ); }); + it("preserves a finite poll deadline across retry wakes", () => { + vi.useFakeTimers(); + vi.setSystemTime(0); + + const processMemory = createSharedMemory(); + const worker = createWorkerHarness({}); + const channel = createChannel(42, processMemory); + worker.processes = new Map([ + [42, { pid: 42, memory: processMemory, channels: [channel], ptrWidth: 4 }], + ]); + worker.activeChannels = [channel]; + + const origArgs = [1024, 1, 10]; + worker.retrySyscall = vi.fn(() => { + worker.handleBlockingRetry(channel, ABI_SYSCALLS.Poll, origArgs); + }); + + worker.handleBlockingRetry(channel, ABI_SYSCALLS.Poll, origArgs); + expect(worker.completeChannel).not.toHaveBeenCalled(); + + vi.advanceTimersByTime(5); + worker.wakeAllBlockedRetries(); + expect(worker.completeChannel).not.toHaveBeenCalled(); + + vi.advanceTimersByTime(4); + expect(worker.completeChannel).not.toHaveBeenCalled(); + + vi.advanceTimersByTime(1); + expect(worker.completeChannel).toHaveBeenCalledWith( + channel, + ABI_SYSCALLS.Poll, + origArgs, + expect.anything(), + 0, + 0, + ); + expect(worker.pollRetryDeadlines.size).toBe(0); + }); + it("interrupts host-side epoll_pwait emulation when a handler signal is pending", () => { const kernelMemory = createSharedMemory(); const processMemory = createSharedMemory(); @@ -179,6 +218,7 @@ function createWorkerHarness(exports: Record): any { posixTimers: new Map(), pendingSleeps: new Map(), pendingPollRetries: new Map(), + pollRetryDeadlines: new Map(), pendingSelectRetries: new Map(), pendingPipeReaders: new Map(), pendingPipeWriters: new Map(), diff --git a/host/test/symlink.test.ts b/host/test/symlink.test.ts index f848dd677..a5674d40a 100644 --- a/host/test/symlink.test.ts +++ b/host/test/symlink.test.ts @@ -51,6 +51,22 @@ describe("symlink and lstat", () => { expect(st.mode & S_IFMT).toBe(S_IFLNK); }); + it("unlink removes a dangling symlink itself", () => { + const mfs = createMemfs(); + + const fd = mfs.open("/target.txt", O_WRONLY | O_CREAT | O_TRUNC, 0o644); + mfs.close(fd); + mfs.symlink("target.txt", "/link.txt"); + + mfs.unlink("/target.txt"); + expect(mfs.lstat("/link.txt").mode & S_IFMT).toBe(S_IFLNK); + + // POSIX unlink(2) unlinks the directory entry named by path. When path is + // a symlink, it removes the link inode and does not follow the target. + mfs.unlink("/link.txt"); + expect(() => mfs.lstat("/link.txt")).toThrow(); + }); + it("readlink returns the symlink target", () => { const mfs = createMemfs(); mfs.symlink("/some/path", "/mylink"); diff --git a/host/test/vfs.test.ts b/host/test/vfs.test.ts index a24d95a16..6c5829164 100644 --- a/host/test/vfs.test.ts +++ b/host/test/vfs.test.ts @@ -469,6 +469,64 @@ describe("MemoryFileSystem", () => { expect(entries).toContain("file.txt"); }); + it("reports raw inode numbers that remain representable after inode reuse", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const O_CREAT = 0x0040, + O_RDWR = 0x0002, + O_TRUNC = 0x0200; + + // SharedFS tracks an internal generation counter for reused inode slots. + // POSIX st_ino does not need to include that generation, and exposing it + // can overflow 32-bit guest language APIs while tools like ls(1) print the + // full kernel value. + for (let i = 0; i < 2_100; i++) { + const fd = mfs.open("/reuse.txt", O_CREAT | O_RDWR | O_TRUNC, 0o644); + mfs.close(fd); + mfs.unlink("/reuse.txt"); + } + + const fd = mfs.open("/reuse.txt", O_CREAT | O_RDWR | O_TRUNC, 0o644); + const stat = mfs.fstat(fd); + expect(stat.ino).toBeGreaterThan(0); + expect(stat.ino).toBeLessThanOrEqual(0x7fffffff); + + const dh = mfs.opendir("/"); + let entry; + let dirIno: number | null = null; + while ((entry = mfs.readdir(dh)) !== null) { + if (entry.name === "reuse.txt") { + dirIno = entry.ino; + break; + } + } + mfs.closedir(dh); + expect(dirIno).toBe(stat.ino); + mfs.close(fd); + }); + + it("honors O_CREAT|O_EXCL by failing when the final path already exists", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const O_WRONLY = 0x0001, + O_CREAT = 0x0040, + O_EXCL = 0x0080; + + const fd = mfs.open("/exclusive.txt", O_WRONLY | O_CREAT | O_EXCL, 0o600); + mfs.close(fd); + + expect(() => + mfs.open("/exclusive.txt", O_WRONLY | O_CREAT | O_EXCL, 0o600), + ).toThrow(/File exists/); + + // POSIX open(O_CREAT|O_EXCL) must fail with EEXIST when the final path is + // a symbolic link, even if the symlink points at an existing regular file. + mfs.symlink("/exclusive.txt", "/exclusive-link.txt"); + expect(() => + mfs.open("/exclusive-link.txt", O_WRONLY | O_CREAT | O_EXCL, 0o600), + ).toThrow(/File exists/); + }); + it("stat returns correct size after writing", () => { const sab = new SharedArrayBuffer(4 * 1024 * 1024); const mfs = MemoryFileSystem.create(sab); @@ -483,6 +541,37 @@ describe("MemoryFileSystem", () => { mfs.close(fd); }); + it("updates mtime and ctime after file writes and truncates", () => { + const now = vi.spyOn(Date, "now"); + try { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + now.mockReturnValue(1_000); + const mfs = MemoryFileSystem.create(sab); + const O_CREAT = 0x0040, + O_RDWR = 0x0002, + O_TRUNC = 0x0200; + const fd = mfs.open("/timestamps.txt", O_CREAT | O_RDWR | O_TRUNC, 0o644); + const initial = mfs.fstat(fd); + + now.mockReturnValue(5_000); + mfs.write(fd, new TextEncoder().encode("abc"), null, 3); + const afterWrite = mfs.fstat(fd); + expect(afterWrite.mtimeMs).toBe(5_000); + expect(afterWrite.ctimeMs).toBe(5_000); + expect(afterWrite.mtimeMs).toBeGreaterThan(initial.mtimeMs); + + now.mockReturnValue(9_000); + mfs.ftruncate(fd, 1); + const afterTruncate = mfs.fstat(fd); + expect(afterTruncate.mtimeMs).toBe(9_000); + expect(afterTruncate.ctimeMs).toBe(9_000); + expect(afterTruncate.mtimeMs).toBeGreaterThan(afterWrite.mtimeMs); + mfs.close(fd); + } finally { + now.mockRestore(); + } + }); + it("unlink removes a file", () => { const sab = new SharedArrayBuffer(4 * 1024 * 1024); const mfs = MemoryFileSystem.create(sab); @@ -494,6 +583,115 @@ describe("MemoryFileSystem", () => { expect(() => mfs.stat("/todelete.txt")).toThrow(); }); + it("rejects unlink paths with a trailing slash on non-directories", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const O_CREAT = 0x0040, + O_WRONLY = 0x0001; + + const fd = mfs.open("/file.txt", O_CREAT | O_WRONLY, 0o644); + mfs.close(fd); + mfs.symlink("/file.txt", "/link.txt"); + + expect(() => mfs.unlink("/file.txt/")).toThrow(/Not a directory/); + expect(() => mfs.unlink("/link.txt/")).toThrow(/Not a directory/); + expect(mfs.stat("/file.txt").mode & 0xf000).toBe(0x8000); + expect(mfs.readlink("/link.txt")).toBe("/file.txt"); + }); + + it("rejects rename source paths that require a non-directory to be a directory", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const O_CREAT = 0x0040, + O_WRONLY = 0x0001; + + const fd = mfs.open("/file.txt", O_CREAT | O_WRONLY, 0o644); + mfs.close(fd); + + expect(() => mfs.rename("/file.txt/", "/renamed.txt")).toThrow( + /Not a directory/, + ); + expect(mfs.stat("/file.txt").size).toBe(0); + expect(() => mfs.stat("/renamed.txt")).toThrow(/No such file/); + }); + + it("preserves POSIX type checks when renaming directories onto existing paths", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const O_CREAT = 0x0040, + O_WRONLY = 0x0001; + + mfs.mkdir("/dir", 0o755); + const fd = mfs.open("/file.txt", O_CREAT | O_WRONLY, 0o644); + mfs.close(fd); + mfs.symlink("/file.txt", "/link.txt"); + + expect(() => mfs.rename("/dir", "/file.txt")).toThrow(/Not a directory/); + expect(() => mfs.rename("/dir", "/link.txt")).toThrow(/Not a directory/); + + expect(mfs.stat("/dir").mode & 0xf000).toBe(0x4000); + expect(mfs.stat("/file.txt").mode & 0xf000).toBe(0x8000); + expect(mfs.readlink("/link.txt")).toBe("/file.txt"); + }); + + it("renames directories over empty directories and updates dot-dot", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const O_CREAT = 0x0040, + O_WRONLY = 0x0001; + + mfs.mkdir("/old-parent", 0o755); + mfs.mkdir("/new-parent", 0o755); + mfs.mkdir("/old-parent/child", 0o755); + const siblingFd = mfs.open( + "/new-parent/sibling.txt", + O_CREAT | O_WRONLY, + 0o644, + ); + mfs.close(siblingFd); + + mfs.rename("/old-parent/child", "/new-parent/child"); + expect(mfs.stat("/new-parent/child/../sibling.txt").mode & 0xf000).toBe( + 0x8000, + ); + + mfs.mkdir("/empty-dest", 0o755); + mfs.rename("/new-parent/child", "/empty-dest"); + expect(mfs.stat("/empty-dest").mode & 0xf000).toBe(0x4000); + expect(() => mfs.stat("/new-parent/child")).toThrow(/No such file/); + }); + + it("keeps an unlinked open file alive until close", () => { + const sab = new SharedArrayBuffer(4 * 1024 * 1024); + const mfs = MemoryFileSystem.create(sab); + const O_CREAT = 0x0040, + O_RDWR = 0x0002, + O_TRUNC = 0x0200; + + const oldFd = mfs.open("/open.txt", O_CREAT | O_RDWR | O_TRUNC, 0o644); + const oldData = new TextEncoder().encode("old"); + mfs.write(oldFd, oldData, null, oldData.length); + mfs.unlink("/open.txt"); + expect(() => mfs.stat("/open.txt")).toThrow(); + + const newFd = mfs.open("/open.txt", O_CREAT | O_RDWR | O_TRUNC, 0o644); + const newData = new TextEncoder().encode("newer"); + mfs.write(newFd, newData, null, newData.length); + + mfs.seek(oldFd, 0, 0); + const oldBuf = new Uint8Array(8); + const oldRead = mfs.read(oldFd, oldBuf, null, oldBuf.length); + expect(new TextDecoder().decode(oldBuf.subarray(0, oldRead))).toBe("old"); + + mfs.seek(newFd, 0, 0); + const newBuf = new Uint8Array(8); + const newRead = mfs.read(newFd, newBuf, null, newBuf.length); + expect(new TextDecoder().decode(newBuf.subarray(0, newRead))).toBe("newer"); + + mfs.close(oldFd); + mfs.close(newFd); + }); + it("ftruncate changes file size", () => { const sab = new SharedArrayBuffer(4 * 1024 * 1024); const mfs = MemoryFileSystem.create(sab); @@ -695,4 +893,14 @@ describe("NodeTimeProvider", () => { const ns2 = BigInt(t2.sec) * 1_000_000_000n + BigInt(t2.nsec); expect(ns2).toBeGreaterThanOrEqual(ns1); }); + + it("treats CLOCK_BOOTTIME as monotonic-equivalent", () => { + const tp = new NodeTimeProvider(); + const monotonic = tp.clockGettime(1); + const boottime = tp.clockGettime(7); + const monotonicNs = BigInt(monotonic.sec) * 1_000_000_000n + BigInt(monotonic.nsec); + const boottimeNs = BigInt(boottime.sec) * 1_000_000_000n + BigInt(boottime.nsec); + expect(boottimeNs).toBeGreaterThanOrEqual(monotonicNs); + expect(boottimeNs - monotonicNs).toBeLessThan(100_000_000n); + }); }); diff --git a/host/test/virtual-network.test.ts b/host/test/virtual-network.test.ts index 279542030..993e3a190 100644 --- a/host/test/virtual-network.test.ts +++ b/host/test/virtual-network.test.ts @@ -9,6 +9,7 @@ import type { TcpConnectionPeer, UdpDatagram } from "../src/types"; const POLLIN = 0x0001; const POLLOUT = 0x0004; const POLLHUP = 0x0010; +const MSG_PEEK = 0x0002; describe("LocalVirtualNetwork", () => { it("routes TCP streams between attached machines", () => { @@ -35,6 +36,27 @@ describe("LocalVirtualNetwork", () => { expect(new TextDecoder().decode(client.recv(7, 16, 0))).toBe("pong"); }); + it("honors MSG_PEEK without consuming TCP stream data", () => { + const net = new LocalVirtualNetwork(); + const server = net.attachMachine({ id: "server", address: [10, 88, 0, 2] }); + const client = net.attachMachine({ id: "client", address: [10, 88, 0, 3] }); + let accepted: TcpConnectionPeer | null = null; + + expect(server.listenTcp!("srv:1", new Uint8Array([10, 88, 0, 2]), 8080, { + accept(peer) { + accepted = peer; + return 0; + }, + })).toBe(0); + + client.connect(7, new Uint8Array([10, 88, 0, 2]), 8080); + expect(accepted).not.toBeNull(); + accepted!.send(new TextEncoder().encode("peek-data"), 0); + + expect(new TextDecoder().decode(client.recv(7, 4, MSG_PEEK))).toBe("peek"); + expect(new TextDecoder().decode(client.recv(7, 9, 0))).toBe("peek-data"); + }); + it("reports refused TCP connects when no listener is bound", () => { const net = new LocalVirtualNetwork(); const client = net.attachMachine({ id: "client", address: [10, 88, 0, 3] }); diff --git a/images/vfs/scripts/shell-vfs-build.ts b/images/vfs/scripts/shell-vfs-build.ts index 969c554eb..513605121 100644 --- a/images/vfs/scripts/shell-vfs-build.ts +++ b/images/vfs/scripts/shell-vfs-build.ts @@ -175,8 +175,11 @@ function populateSystem(fs: MemoryFileSystem): void { "ssh\t\t22/tcp", "telnet\t\t23/tcp", "smtp\t\t25/tcp\t\tmail", + "nicname\t\t43/tcp\t\twhois", "domain\t\t53/tcp", "domain\t\t53/udp", + "gopher\t\t70/tcp", + "finger\t\t79/tcp", "http\t\t80/tcp\t\twww", "pop3\t\t110/tcp\t\tpop-3", "nntp\t\t119/tcp\t\treadnews untp", diff --git a/libc/musl-overlay/src/network/wasm32posix/if_indextoname.c b/libc/musl-overlay/src/network/wasm32posix/if_indextoname.c index f933623bc..2e2d3a462 100644 --- a/libc/musl-overlay/src/network/wasm32posix/if_indextoname.c +++ b/libc/musl-overlay/src/network/wasm32posix/if_indextoname.c @@ -5,7 +5,7 @@ char *if_indextoname(unsigned index, char *name) { if (index == 1) - return strncpy(name, "lo", IF_NAMESIZE); + return strncpy(name, "eth0", IF_NAMESIZE); errno = ENXIO; return 0; } diff --git a/libc/musl-overlay/src/network/wasm32posix/if_nameindex.c b/libc/musl-overlay/src/network/wasm32posix/if_nameindex.c index 72ca21704..6676a2883 100644 --- a/libc/musl-overlay/src/network/wasm32posix/if_nameindex.c +++ b/libc/musl-overlay/src/network/wasm32posix/if_nameindex.c @@ -4,11 +4,11 @@ struct if_nameindex *if_nameindex(void) { - /* Return a synthetic loopback interface */ + /* Return the kernel's synthetic virtual network interface. */ struct if_nameindex *idx = malloc(2 * sizeof(*idx)); if (!idx) return 0; idx[0].if_index = 1; - idx[0].if_name = strdup("lo"); + idx[0].if_name = strdup("eth0"); if (!idx[0].if_name) { free(idx); return 0; diff --git a/libc/musl-overlay/src/network/wasm32posix/if_nametoindex.c b/libc/musl-overlay/src/network/wasm32posix/if_nametoindex.c index 4c837e63a..0141c18e3 100644 --- a/libc/musl-overlay/src/network/wasm32posix/if_nametoindex.c +++ b/libc/musl-overlay/src/network/wasm32posix/if_nametoindex.c @@ -3,6 +3,6 @@ unsigned if_nametoindex(const char *name) { - if (!strcmp(name, "lo")) return 1; + if (!strcmp(name, "eth0")) return 1; return 0; } diff --git a/packages/registry/kandelo-sdk/build.toml b/packages/registry/kandelo-sdk/build.toml index 18fb0b38f..a993a28bc 100644 --- a/packages/registry/kandelo-sdk/build.toml +++ b/packages/registry/kandelo-sdk/build.toml @@ -26,7 +26,7 @@ inputs = [ ] repo_url = "https://github.com/Automattic/kandelo.git" commit = "UNPUBLISHED" -revision = 1 +revision = 2 [binary] index_url = "https://github.com/Automattic/kandelo/releases/download/binaries-abi-v{abi}/index.toml" diff --git a/packages/registry/php/test/browser/run-php.ts b/packages/registry/php/test/browser/run-php.ts index ac42a13db..b8dedbedd 100644 --- a/packages/registry/php/test/browser/run-php.ts +++ b/packages/registry/php/test/browser/run-php.ts @@ -43,7 +43,7 @@ async function runPhp( new SharedArrayBuffer(16 * 1024 * 1024, { maxByteLength: 64 * 1024 * 1024 }), 64 * 1024 * 1024, ); - for (const dir of ["/tmp", "/root", "/dev"]) ensureDir(memfs, dir); + for (const dir of ["/tmp", "/root", "/home", "/dev"]) ensureDir(memfs, dir); memfs.chmod("/tmp", 0o777); memfs.chmod("/root", 0o700); ensureDirRecursive(memfs, "/usr/local/bin"); diff --git a/programs/ifhwaddr.c b/programs/ifhwaddr.c index e30bab372..8bf2072d4 100644 --- a/programs/ifhwaddr.c +++ b/programs/ifhwaddr.c @@ -33,6 +33,12 @@ int main(void) { for (i = 0; i < n; i++) { printf("name: %s\n", ifr[i].ifr_name); + unsigned index = if_nametoindex(ifr[i].ifr_name); + printf("index: %u\n", index); + if (index != 0) { + char name[IF_NAMESIZE]; + printf("index-name: %s\n", if_indextoname(index, name) ? name : "(null)"); + } if (ioctl(fd, SIOCGIFHWADDR, &ifr[i]) < 0) { perror("SIOCGIFHWADDR"); diff --git a/scripts/run-php-upstream-node-chunks.sh b/scripts/run-php-upstream-node-chunks.sh index 0b22c454a..9c087275f 100755 --- a/scripts/run-php-upstream-node-chunks.sh +++ b/scripts/run-php-upstream-node-chunks.sh @@ -251,7 +251,7 @@ while [ "$offset" -lt "$total" ]; do --jobs "$jobs" \ --timeout "$timeout_ms" \ --host-reset-interval "$host_reset_interval" \ - "${extra_args[@]}" \ + ${extra_args[@]+"${extra_args[@]}"} \ --json \ > "$jsonl" 2> "$stderr" status=$? diff --git a/sdk/kandelo/bin/wasm32posix-cc b/sdk/kandelo/bin/wasm32posix-cc index 8fab8b8f8..7f283100c 100755 --- a/sdk/kandelo/bin/wasm32posix-cc +++ b/sdk/kandelo/bin/wasm32posix-cc @@ -45,6 +45,8 @@ GLUE_DIR="${WASM_POSIX_GLUE_DIR:-/usr/wasm32posix/glue}" GLUE_OBJ_DIR="${WASM_POSIX_GLUE_OBJ_DIR:-/usr/wasm32posix/glue-objects}" RESOURCE_DIR="${WASM_POSIX_CLANG_RESOURCE_DIR:-/usr/lib/llvm/lib/clang/21}" WASM_LD="$(find_tool wasm-ld || true)" +wasm_page_size=65536 +default_global_base=1114112 [[ -f "${SYSROOT}/lib/libc.a" ]] || die "missing ${SYSROOT}/lib/libc.a" [[ -f "${GLUE_DIR}/channel_syscall.c" ]] || die "missing ${GLUE_DIR}/channel_syscall.c" @@ -144,7 +146,6 @@ exe_link_flags=( -Wl,--shared-memory -Wl,--max-memory=1073741824 -Wl,--allow-undefined - -Wl,--global-base=1114112 -Wl,--table-base=3 -Wl,--export-table -Wl,--growable-table @@ -240,6 +241,44 @@ infer_thread_slots_decl() { printf '%s\n' "0" } +requested_stack_size_from_user_link_args() { + local requested="" idx token parsed + for ((idx = 0; idx < ${#user_link_args[@]}; idx++)); do + token="${user_link_args[$idx]}" + parsed="" + case "$token" in + stack-size=*) + parsed="${token#stack-size=}" + ;; + --stack-size=*) + parsed="${token#--stack-size=}" + ;; + stack-size) + if ((idx + 1 < ${#user_link_args[@]})); then + parsed="${user_link_args[$((idx + 1))]}" + fi + ;; + esac + if [[ "$parsed" =~ ^[0-9]+$ ]]; then + if [[ -z "$requested" || $((10#$parsed)) -gt $((10#$requested)) ]]; then + requested="$parsed" + fi + fi + done + printf '%s\n' "$requested" +} + +global_base_for_stack_size() { + local stack_size="${1:-}" global_base="$default_global_base" stack_with_guard + if [[ "$stack_size" =~ ^[0-9]+$ ]]; then + stack_with_guard=$(( ((10#$stack_size + wasm_page_size + wasm_page_size - 1) / wasm_page_size) * wasm_page_size )) + if ((stack_with_guard > global_base)); then + global_base="$stack_with_guard" + fi + fi + printf '%s\n' "$global_base" +} + append_ld_flags() { local flag part for flag in "$@"; do @@ -294,7 +333,9 @@ if [[ "$SDK_CXX" -eq 1 ]]; then stdlib_inputs+=("${SYSROOT}/lib/libc++.a" "${SYSROOT}/lib/libc++abi.a") fi -append_ld_flags "${exe_link_flags[@]}" +stack_size_request="$(requested_stack_size_from_user_link_args)" +exe_global_base="$(global_base_for_stack_size "$stack_size_request")" +append_ld_flags "${exe_link_flags[@]}" "-Wl,--global-base=${exe_global_base}" "$WASM_LD" -m wasm32 -L"${SYSROOT}/lib" \ "${object_inputs[@]}" "${compiled_objects[@]}" "${glue_inputs[@]}" \ "${SYSROOT}/lib/crt1.o" "${user_link_args[@]}" "${stdlib_inputs[@]}" "${SYSROOT}/lib/libc.a" \ diff --git a/sdk/src/bin/cc.ts b/sdk/src/bin/cc.ts index 33bd8d2c2..6fbfd3218 100755 --- a/sdk/src/bin/cc.ts +++ b/sdk/src/bin/cc.ts @@ -9,6 +9,7 @@ import { linkFlags, needsLinking, parseArgs, + requestedWasmStackSize, SHARED_LINK_FLAGS, THREAD_SLOT_USE_HOST_DEFAULT, threadSlotDeclarationDefine, @@ -85,7 +86,7 @@ export function buildClangArgs(userArgs: string[], toolchain: Toolchain, arch: W args.push( join(toolchain.sysroot, 'lib', 'crt1.o'), join(toolchain.sysroot, 'lib', 'libc.a'), - ...linkFlags(arch), + ...linkFlags(arch, { stackSizeBytes: requestedWasmStackSize(parsed.otherArgs) }), ); } } diff --git a/sdk/src/lib/flags.ts b/sdk/src/lib/flags.ts index 9b2876d15..287a74b3f 100644 --- a/sdk/src/lib/flags.ts +++ b/sdk/src/lib/flags.ts @@ -20,7 +20,75 @@ export function compileFlags(arch: WasmArch): string[] { ]; } -export function linkFlags(arch: WasmArch): string[] { +const WASM_PAGE_SIZE = 65536; +const DEFAULT_GLOBAL_BASE = 1114112; + +export interface LinkFlagOptions { + stackSizeBytes?: number | null; +} + +function alignUp(value: number, alignment: number): number { + return Math.ceil(value / alignment) * alignment; +} + +export function globalBaseForStackSize(stackSizeBytes: number | null | undefined): number { + if (stackSizeBytes === null || stackSizeBytes === undefined) return DEFAULT_GLOBAL_BASE; + if (!Number.isSafeInteger(stackSizeBytes) || stackSizeBytes < 0) return DEFAULT_GLOBAL_BASE; + const stackWithGuard = alignUp(stackSizeBytes + WASM_PAGE_SIZE, WASM_PAGE_SIZE); + return Math.max(DEFAULT_GLOBAL_BASE, stackWithGuard); +} + +function parseStackSizeToken(token: string): number | null { + const match = /^(?:--)?stack-size=(\d+)$/.exec(token); + if (!match) return null; + const parsed = Number(match[1]); + if (!Number.isSafeInteger(parsed) || parsed < 0) return null; + return parsed; +} + +export function requestedWasmStackSize(args: string[]): number | null { + const linkerArgs: string[] = []; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + if (arg.startsWith('-Wl,')) { + linkerArgs.push(...arg.slice('-Wl,'.length).split(',')); + } else if (arg === '-Xlinker' && i + 1 < args.length) { + i++; + linkerArgs.push(args[i]); + } else if (arg.startsWith('-Xlinker=')) { + linkerArgs.push(arg.slice('-Xlinker='.length)); + } else if (arg === '-z' || arg.startsWith('--stack-size=')) { + linkerArgs.push(arg); + } else if (/^stack-size(?:=|$)/.test(arg)) { + linkerArgs.push(arg); + } + } + + let requested: number | null = null; + for (let i = 0; i < linkerArgs.length; i++) { + const token = linkerArgs[i]; + const parsed = parseStackSizeToken(token); + if (parsed !== null) { + requested = Math.max(requested ?? 0, parsed); + continue; + } + if (token === 'stack-size' && i + 1 < linkerArgs.length) { + const value = linkerArgs[i + 1]; + if (/^\d+$/.test(value)) { + const parsedValue = Number(value); + if (Number.isSafeInteger(parsedValue)) { + requested = Math.max(requested ?? 0, parsedValue); + } + } + } + } + + return requested; +} + +export function linkFlags(arch: WasmArch, options: LinkFlagOptions = {}): string[] { + const globalBase = globalBaseForStackSize(options.stackSizeBytes); return [ '-nostdlib', '-Wl,--entry=_start', @@ -30,7 +98,7 @@ export function linkFlags(arch: WasmArch): string[] { '-Wl,--shared-memory', '-Wl,--max-memory=1073741824', '-Wl,--allow-undefined', - '-Wl,--global-base=1114112', + `-Wl,--global-base=${globalBase}`, '-Wl,--table-base=3', '-Wl,--export-table', '-Wl,--growable-table', diff --git a/sdk/test/cc.test.ts b/sdk/test/cc.test.ts index 0e9716e6f..dc493daa8 100644 --- a/sdk/test/cc.test.ts +++ b/sdk/test/cc.test.ts @@ -72,4 +72,11 @@ describe('buildClangArgs', () => { expect(args).toContain('-DWASM_POSIX_THREAD_SLOT_DECL=2'); expect(args).not.toContain('--kandelo-thread-slots=2'); }); + + it('raises global base when a link requests a larger wasm stack', () => { + const args = buildClangArgs(['foo.c', '-o', 'foo.wasm', '-Wl,-z,stack-size=4194304'], toolchain); + expect(args).toContain('-Wl,-z,stack-size=4194304'); + expect(args).toContain('-Wl,--global-base=4259840'); + expect(args).not.toContain('-Wl,--global-base=1114112'); + }); }); diff --git a/sdk/test/flags.test.ts b/sdk/test/flags.test.ts index ad3165cb4..6463ac986 100644 --- a/sdk/test/flags.test.ts +++ b/sdk/test/flags.test.ts @@ -2,10 +2,13 @@ import { describe, it, expect } from 'vitest'; import { COMPILE_FLAGS, filterArgs, + globalBaseForStackSize, inferThreadSlotDeclaration, LINK_FLAGS, + linkFlags, needsLinking, parseArgs, + requestedWasmStackSize, THREAD_SLOT_NONE, THREAD_SLOT_USE_HOST_DEFAULT, } from '../src/lib/flags.ts'; @@ -160,6 +163,34 @@ describe('LINK_FLAGS', () => { expect(LINK_FLAGS).toContain('-Wl,--import-memory'); expect(LINK_FLAGS).toContain('-Wl,--shared-memory'); }); + + it('keeps the default global base for default-sized stacks', () => { + expect(globalBaseForStackSize(null)).toBe(1114112); + expect(globalBaseForStackSize(1048576)).toBe(1114112); + expect(linkFlags('wasm32')).toContain('-Wl,--global-base=1114112'); + }); + + it('raises the global base for larger wasm stack reservations', () => { + expect(globalBaseForStackSize(4194304)).toBe(4259840); + expect(linkFlags('wasm32', { stackSizeBytes: 4194304 })) + .toContain('-Wl,--global-base=4259840'); + }); +}); + +describe('requestedWasmStackSize', () => { + it('detects wasm-ld stack-size flags passed through clang', () => { + expect(requestedWasmStackSize(['-Wl,-z,stack-size=4194304'])).toBe(4194304); + expect(requestedWasmStackSize(['-Xlinker', '-z', '-Xlinker', 'stack-size=2097152'])) + .toBe(2097152); + expect(requestedWasmStackSize(['-Wl,--stack-size=3145728'])).toBe(3145728); + }); + + it('returns the largest stack-size request if repeated', () => { + expect(requestedWasmStackSize([ + '-Wl,-z,stack-size=1048576', + '-Wl,-z,stack-size=4194304', + ])).toBe(4194304); + }); }); describe('inferThreadSlotDeclaration', () => {