From 6ecec9667f0aa20b6753e726a36defd766bb6ddc Mon Sep 17 00:00:00 2001 From: Kandelo Agent Date: Thu, 11 Jun 2026 13:39:06 +0000 Subject: [PATCH 1/8] fix: isolate PHPT workers and preserve process status (cherry picked from commit d2011d8459221458404bc49abccf5195dd3905c8) --- abi/snapshot.json | 19 ++- crates/kernel/src/fork.rs | 2 + crates/kernel/src/process.rs | 5 + crates/kernel/src/process_table.rs | 33 +++- crates/kernel/src/syscalls.rs | 3 +- crates/kernel/src/wasm_api.rs | 81 ++++++++-- host/src/browser-kernel-host.ts | 7 + host/src/browser-kernel-protocol.ts | 2 + host/src/browser-kernel-worker-entry.ts | 12 +- host/src/kernel-worker.ts | 198 ++++++++++++++++++++---- host/src/node-kernel-host.ts | 3 + host/src/node-kernel-protocol.ts | 2 + host/src/node-kernel-worker-entry.ts | 13 +- host/test/multi-worker.test.ts | 76 ++++++++- host/test/select-timeout-retry.test.ts | 126 +++++++++++++++ 15 files changed, 520 insertions(+), 62 deletions(-) create mode 100644 host/test/select-timeout-retry.test.ts diff --git a/abi/snapshot.json b/abi/snapshot.json index fd1b81280..99678a2f2 100644 --- a/abi/snapshot.json +++ b/abi/snapshot.json @@ -944,6 +944,11 @@ "name": "kernel_madvise", "signature": "(i32,i32,i32) -> (i32)" }, + { + "kind": "func", + "name": "kernel_mark_process_exited", + "signature": "(i32,i32) -> (i32)" + }, { "kind": "func", "name": "kernel_mark_process_signaled", @@ -1092,7 +1097,7 @@ { "kind": "func", "name": "kernel_preadv", - "signature": "(i32,i32,i32,i32,i32) -> (i32)" + "signature": "(i32,i32,i32,i64) -> (i32)" }, { "kind": "func", @@ -1132,7 +1137,7 @@ { "kind": "func", "name": "kernel_pwritev", - "signature": "(i32,i32,i32,i32,i32) -> (i32)" + "signature": "(i32,i32,i32,i64) -> (i32)" }, { "kind": "func", @@ -1309,6 +1314,11 @@ "name": "kernel_set_cwd", "signature": "(i32,i32,i32) -> (i32)" }, + { + "kind": "func", + "name": "kernel_set_fd_pipe", + "signature": "(i32,i32) -> (i32)" + }, { "kind": "func", "name": "kernel_set_fork_exec", @@ -1349,6 +1359,11 @@ "name": "kernel_set_stdin_pipe", "signature": "(i32) -> (i32)" }, + { + "kind": "func", + "name": "kernel_set_stdio_pipe", + "signature": "(i32,i32) -> (i32)" + }, { "kind": "func", "name": "kernel_set_tid_address", diff --git a/crates/kernel/src/fork.rs b/crates/kernel/src/fork.rs index 087c47e4b..4f4925423 100644 --- a/crates/kernel/src/fork.rs +++ b/crates/kernel/src/fork.rs @@ -1176,6 +1176,7 @@ pub fn deserialize_fork_state(buf: &[u8], child_pid: u32) -> Result Result { is_session_leader, state: ProcessState::Running, exit_status: 0, + exit_signal: 0, fd_table, ofd_table, lock_table: LockTable::new(), diff --git a/crates/kernel/src/process.rs b/crates/kernel/src/process.rs index 1c7ecd0ab..d51934b75 100644 --- a/crates/kernel/src/process.rs +++ b/crates/kernel/src/process.rs @@ -511,7 +511,11 @@ pub struct Process { /// POSIX uses this flag (not `sid == pid`) to gate setpgid EPERM checks. pub is_session_leader: bool, pub state: ProcessState, + /// Low 8-bit status supplied to _exit()/exit_group() for normal exits. + /// POSIX wait status encoding keeps normal exit codes 0..255 distinct + /// from signal termination; `exit_signal != 0` records the latter. pub exit_status: i32, + pub exit_signal: u32, pub fd_table: FdTable, pub ofd_table: OfdTable, pub lock_table: LockTable, @@ -627,6 +631,7 @@ impl Process { is_session_leader: false, state: ProcessState::Running, exit_status: 0, + exit_signal: 0, fd_table, ofd_table, lock_table: LockTable::new(), diff --git a/crates/kernel/src/process_table.rs b/crates/kernel/src/process_table.rs index 337d1d8d5..266ef9193 100644 --- a/crates/kernel/src/process_table.rs +++ b/crates/kernel/src/process_table.rs @@ -475,6 +475,7 @@ impl ProcessTable { limbo.is_session_leader = proc.is_session_leader; limbo.state = ProcessState::Limbo; limbo.exit_status = proc.exit_status; + limbo.exit_signal = proc.exit_signal; limbo.cwd = proc.cwd.clone(); limbo.environ = proc.environ.clone(); limbo.argv = proc.argv.clone(); @@ -858,7 +859,8 @@ impl ProcessTable { pub fn mark_process_signaled(&mut self, pid: u32, signum: u32) -> Result<(), Errno> { let proc = self.processes.get_mut(&pid).ok_or(Errno::ESRCH)?; proc.state = ProcessState::Exited; - proc.exit_status = 128 + signum as i32; + proc.exit_status = 0; + proc.exit_signal = signum & 0x7f; Ok(()) } @@ -888,7 +890,7 @@ impl ProcessTable { if child.state == ProcessState::Exited { return Ok(Some(( child_pid, - Self::wait_status_from_exit_status(child.exit_status), + Self::wait_status_from_process(child), ))); } } @@ -929,11 +931,11 @@ impl ProcessTable { child.pgid == target_pgid } - fn wait_status_from_exit_status(exit_status: i32) -> i32 { - if exit_status >= 128 { - (exit_status - 128) & 0x7f + fn wait_status_from_process(proc: &Process) -> i32 { + if proc.exit_signal != 0 { + (proc.exit_signal as i32) & 0x7f } else { - (exit_status & 0xff) << 8 + (proc.exit_status & 0xff) << 8 } } } @@ -947,7 +949,7 @@ mod wait_tests { let mut table = ProcessTable::new(); let first_pid = table.allocate_spawn_pid(); - table.processes.insert(first_pid, Process::new(first_pid)); + table.processes.insert(first_pid, Process::new_boxed(first_pid)); table.processes.remove(&first_pid); let second_pid = table.allocate_spawn_pid(); @@ -1089,6 +1091,23 @@ mod tests { ); } + #[test] + fn poll_waitable_child_preserves_high_normal_exit_status() { + let mut table = ProcessTable::new(); + table.create_process(10).unwrap(); + table.create_process(11).unwrap(); + let child = table.processes.get_mut(&11).unwrap(); + child.ppid = 10; + child.state = ProcessState::Exited; + child.exit_status = 255; + child.exit_signal = 0; + + assert_eq!( + table.poll_waitable_child(10, -1).unwrap(), + Some((11, 255 << 8)) + ); + } + #[test] fn poll_waitable_child_encodes_signal_status() { let mut table = ProcessTable::new(); diff --git a/crates/kernel/src/syscalls.rs b/crates/kernel/src/syscalls.rs index 2a476f8dc..e03696bdb 100644 --- a/crates/kernel/src/syscalls.rs +++ b/crates/kernel/src/syscalls.rs @@ -5452,7 +5452,8 @@ pub fn sys_exit(proc: &mut Process, host: &mut dyn HostIO, status: i32) { fallback_lock_table(proc).remove_all_for_pid(pid); proc.state = ProcessState::Exited; - proc.exit_status = status; + proc.exit_status = status & 0xff; + proc.exit_signal = 0; } /// Get the current time from the specified clock. diff --git a/crates/kernel/src/wasm_api.rs b/crates/kernel/src/wasm_api.rs index ff7c0d18a..5730eccd0 100644 --- a/crates/kernel/src/wasm_api.rs +++ b/crates/kernel/src/wasm_api.rs @@ -1188,6 +1188,19 @@ fn ensure_memory_covers(_end_addr: usize) { // No-op on non-Wasm targets (tests) } +fn terminate_process_by_signal( + proc: &mut crate::process::Process, + host: &mut WasmHostIO, + signum: u32, +) { + proc.sigsuspend_saved_mask = None; + for t in proc.threads.iter_mut() { + t.signals.sigsuspend_saved_mask = None; + } + crate::syscalls::sys_exit(proc, host, 0); + proc.exit_signal = signum & 0x7f; +} + // 3c. Signal delivery at syscall boundaries // --------------------------------------------------------------------------- @@ -1195,7 +1208,6 @@ fn ensure_memory_covers(_end_addr: usize) { fn deliver_pending_signals(proc: &mut Process, host: &mut WasmHostIO) { use crate::signal::{DefaultAction, SignalHandler, default_action}; let tid = crate::process_table::current_tid(); - let _ = host; loop { // Caught signals are delivered by the glue code via // kernel_dequeue_signal; default and ignored signals are consumed here. @@ -1214,8 +1226,7 @@ fn deliver_pending_signals(proc: &mut Process, host: &mut WasmHostIO) { let _ = dequeue_signal_for(proc, tid, signum); match default_action(signum) { DefaultAction::Terminate | DefaultAction::CoreDump => { - proc.state = crate::process::ProcessState::Exited; - proc.exit_status = 128 + signum as i32; + terminate_process_by_signal(proc, host, signum); } _ => {} } @@ -1460,11 +1471,37 @@ pub extern "C" fn kernel_set_process_argv(pid: u32, data_ptr: *const u8, data_le /// Returns 0 on success, -ESRCH if pid not found. #[unsafe(no_mangle)] pub extern "C" fn kernel_set_stdin_pipe(pid: u32) -> i32 { + kernel_set_stdio_pipe(pid, 0) +} + +/// Mark one of a process's standard descriptors as a host-backed pipe. +/// +/// Hosts use this when they connect stdin/stdout/stderr to capture pipes +/// rather than a terminal. The descriptor continues to use its existing +/// host handle (0, 1, or 2), so reads and writes still delegate to the host +/// stdio callbacks, but POSIX-visible metadata changes from character device +/// to FIFO: isatty() returns ENOTTY and fstat() reports S_IFIFO. +/// Returns 0 on success, -EINVAL for non-stdio fds, -ESRCH if pid not found. +#[unsafe(no_mangle)] +pub extern "C" fn kernel_set_stdio_pipe(pid: u32, fd: i32) -> i32 { + if !(0..=2).contains(&fd) { + return -(Errno::EINVAL as i32); + } let table = unsafe { &mut *PROCESS_TABLE.0.get() }; if let Some(proc) = table.get_mut(pid) { - // Change OFD 0 (stdin) from CharDevice to Pipe - if let Some(ofd) = proc.ofd_table.get_mut(0) { - ofd.file_type = crate::ofd::FileType::Pipe; + if let Ok(entry) = proc.fd_table.get(fd) { + let ofd_idx = entry.ofd_ref.0; + if let Some(ofd) = proc.ofd_table.get_mut(ofd_idx) { + ofd.host_handle = fd as i64; + ofd.path = match fd { + 0 => b"/dev/stdin".to_vec(), + 1 => b"/dev/stdout".to_vec(), + _ => b"/dev/stderr".to_vec(), + }; + ofd.file_type = crate::ofd::FileType::Pipe; + } + } else { + return -(Errno::EBADF as i32); } 0 } else { @@ -1472,6 +1509,12 @@ pub extern "C" fn kernel_set_stdin_pipe(pid: u32) -> i32 { } } +/// Backwards-compatible alias for older host code. +#[unsafe(no_mangle)] +pub extern "C" fn kernel_set_fd_pipe(pid: u32, fd: i32) -> i32 { + kernel_set_stdio_pipe(pid, fd) +} + fn finish_removed_process(pid: u32, result: crate::process_table::RemoveProcessResult) { use core::sync::atomic::Ordering; @@ -1643,13 +1686,21 @@ pub extern "C" fn kernel_clear_fork_child(pid: u32) -> i32 { } } -/// Get process exit status. -/// Returns exit_status if process is exited, -1 if still alive, -ESRCH if not found. +/// Get process exit status (centralized mode). +/// Returns the shell-style status used by host-side kill scans: normal exit +/// code for regular exits, 128+signal for signal termination, -1 if still +/// alive, or -ESRCH if not found. #[unsafe(no_mangle)] pub extern "C" fn kernel_get_process_exit_status(pid: u32) -> i32 { let table = unsafe { &*PROCESS_TABLE.0.get() }; match table.get(pid) { - Some(proc) if proc.state == crate::process::ProcessState::Exited => proc.exit_status, + Some(proc) if proc.state == crate::process::ProcessState::Exited => { + if proc.exit_signal != 0 { + 128 + proc.exit_signal as i32 + } else { + proc.exit_status + } + } Some(_) => -1, None => -(Errno::ESRCH as i32), } @@ -2116,13 +2167,8 @@ pub extern "C" fn kernel_dequeue_signal(pid: u32, out_ptr: *mut u8) -> i32 { let _ = dequeue_signal_for(proc, tid, signum); match default_action(signum) { DefaultAction::Terminate | DefaultAction::CoreDump => { - // Process is dying; clear sigsuspend state - proc.sigsuspend_saved_mask = None; - for t in proc.threads.iter_mut() { - t.signals.sigsuspend_saved_mask = None; - } - proc.state = crate::process::ProcessState::Exited; - proc.exit_status = 128 + signum as i32; + let mut host = WasmHostIO; + terminate_process_by_signal(proc, &mut host, signum); return 0; } _ => continue, @@ -6930,7 +6976,8 @@ pub extern "C" fn kernel_exit(status: i32) -> ! { if unsafe { host_is_thread_worker() } != 0 { // Thread exit: don't destroy shared process state (FDs, pipes, etc.). // Just set exit status and return — the glue will trap via unreachable. - proc.exit_status = status; + proc.exit_status = status & 0xff; + proc.exit_signal = 0; // Drop GKL guard before trapping } else { let mut host = WasmHostIO; diff --git a/host/src/browser-kernel-host.ts b/host/src/browser-kernel-host.ts index f15566ee4..083b5065f 100644 --- a/host/src/browser-kernel-host.ts +++ b/host/src/browser-kernel-host.ts @@ -115,6 +115,8 @@ export interface BrowserKernelBootOptions { pty?: boolean; /** Initial stdin bytes (with implicit EOF). */ stdin?: Uint8Array; + /** Stdio fds (0, 1, 2) that should be host-backed pipes, not terminals. */ + pipeStdio?: number[]; } export class BrowserKernel { @@ -421,6 +423,7 @@ export class BrowserKernel { gid: options.gid, pty: options.pty, stdin: options.stdin, + pipeStdio: options.pipeStdio, maxPages: this.maxPages, }) as number; @@ -465,6 +468,7 @@ export class BrowserKernel { env?: string[]; cwd?: string; stdin?: Uint8Array; + pipeStdio?: number[]; pty?: boolean; uid?: number; gid?: number; @@ -497,6 +501,7 @@ export class BrowserKernel { ptyCols: options?.ptyCols, ptyRows: options?.ptyRows, stdin: options?.stdin, + pipeStdio: options?.pipeStdio, maxPages: this.maxPages, }, [bytesToSend]); @@ -538,6 +543,7 @@ export class BrowserKernel { gid?: number; pty?: boolean; stdin?: Uint8Array; + pipeStdio?: number[]; ptyCols?: number; ptyRows?: number; }, @@ -556,6 +562,7 @@ export class BrowserKernel { ptyCols: options?.ptyCols, ptyRows: options?.ptyRows, stdin: options?.stdin, + pipeStdio: options?.pipeStdio, maxPages: this.maxPages, }) as number; diff --git a/host/src/browser-kernel-protocol.ts b/host/src/browser-kernel-protocol.ts index b64d202cb..ce9521b5f 100644 --- a/host/src/browser-kernel-protocol.ts +++ b/host/src/browser-kernel-protocol.ts @@ -89,6 +89,8 @@ export interface SpawnMessage { ptyCols?: number; ptyRows?: number; stdin?: Uint8Array; + /** Stdio fds (0, 1, 2) that should be host-backed pipes, not terminals. */ + pipeStdio?: number[]; maxPages?: number; } diff --git a/host/src/browser-kernel-worker-entry.ts b/host/src/browser-kernel-worker-entry.ts index fa3b2fc86..916ae4686 100644 --- a/host/src/browser-kernel-worker-entry.ts +++ b/host/src/browser-kernel-worker-entry.ts @@ -903,9 +903,15 @@ async function handleSpawn(msg: Extract) if (msg.ptyCols != null && msg.ptyRows != null) { kernelWorker.ptySetWinsize(ptyIdx, msg.ptyRows, msg.ptyCols); } - } else if (msg.stdin) { - const stdinData = msg.stdin instanceof Uint8Array ? msg.stdin : new Uint8Array(msg.stdin); - kernelWorker.setStdinData(pid, stdinData); + } else { + if (msg.pipeStdio) { + kernelWorker.setStdioPipes(pid, msg.pipeStdio); + } + if (msg.stdin) { + const stdinData = + msg.stdin instanceof Uint8Array ? msg.stdin : new Uint8Array(msg.stdin); + kernelWorker.setStdinData(pid, stdinData); + } } const initData: CentralizedWorkerInitMessage = { diff --git a/host/src/kernel-worker.ts b/host/src/kernel-worker.ts index d22c521b7..f86729b4d 100644 --- a/host/src/kernel-worker.ts +++ b/host/src/kernel-worker.ts @@ -97,6 +97,9 @@ const FORK_BUF_SIZE = FORK_SAVE_BUFFER_SIZE; /** Errno values */ const EAGAIN = 11; +const EFAULT = 14; +const EEXIST = 17; +const ENAMETOOLONG = 36; const ETIMEDOUT = 110; const EINTR_ERRNO = 4; @@ -1185,6 +1188,27 @@ export class CentralizedKernelWorker { } } + /** + * Mark selected stdio descriptors as host-backed pipes rather than + * terminal character devices. Reads/writes still use host handles 0/1/2, + * but POSIX-visible metadata changes so isatty() fails with ENOTTY and + * fstat() reports FIFO semantics. + */ + setStdioPipes(pid: number, fds: number[]): void { + if (!this.kernelInstance) return; + const kernelSetStdioPipe = + (this.kernelInstance.exports.kernel_set_stdio_pipe ?? + this.kernelInstance.exports.kernel_set_fd_pipe) as + | ((pid: number, fd: number) => number) + | undefined; + if (!kernelSetStdioPipe) return; + for (const fd of fds) { + if (fd >= 0 && fd <= 2) { + kernelSetStdioPipe(pid, fd); + } + } + } + /** * Set stdout/stderr capture callbacks on the underlying kernel instance. * Must be called after construction but works at any time. @@ -1837,6 +1861,96 @@ export class CentralizedKernelWorker { return new TextDecoder("utf-8", { fatal: false }).decode(copy); } + private writeKernelScratchString(value: string, offset: number): { ptr: number; len: number } { + const encoded = new TextEncoder().encode(value); + const ptr = this.scratchOffset + offset; + const kernelMem = this.getKernelMem(); + if (offset + encoded.length + 1 > CH_DATA_SIZE) { + throw new Error(`kernel scratch string too large (${encoded.length} bytes)`); + } + kernelMem.set(encoded, ptr); + kernelMem[ptr + encoded.length] = 0; + return { ptr, len: encoded.length }; + } + + private unsetProcessEnv(pid: number, name: string): void { + const setCurrentPid = this.kernelInstance!.exports.kernel_set_current_pid as + ((pid: number) => void) | undefined; + const unsetEnv = this.kernelInstance!.exports.kernel_unsetenv as + ((namePtr: KernelPointer, nameLen: number) => number) | undefined; + if (!setCurrentPid || !unsetEnv) return; + + const nameBuf = this.writeKernelScratchString(name, 0); + setCurrentPid(pid); + unsetEnv(this.toKernelPtr(nameBuf.ptr), nameBuf.len); + } + + private setProcessEnv(pid: number, entry: string): void { + const eq = entry.indexOf("="); + if (eq <= 0) return; + + const setCurrentPid = this.kernelInstance!.exports.kernel_set_current_pid as + ((pid: number) => void) | undefined; + const setEnv = this.kernelInstance!.exports.kernel_setenv as + ((namePtr: KernelPointer, nameLen: number, valuePtr: KernelPointer, valueLen: number, overwrite: number) => number) | undefined; + if (!setCurrentPid || !setEnv) return; + + const name = entry.slice(0, eq); + const value = entry.slice(eq + 1); + const nameBuf = this.writeKernelScratchString(name, 0); + const valueBuf = this.writeKernelScratchString(value, nameBuf.len + 1); + setCurrentPid(pid); + setEnv(this.toKernelPtr(nameBuf.ptr), nameBuf.len, this.toKernelPtr(valueBuf.ptr), valueBuf.len, 1); + } + + private replaceProcessEnvironment(pid: number, env: string[]): void { + for (const entry of this.snapshotProcessEnv(pid)) { + const eq = entry.indexOf("="); + if (eq > 0) this.unsetProcessEnv(pid, entry.slice(0, eq)); + } + for (const entry of env) { + this.setProcessEnv(pid, entry); + } + } + + private snapshotCurrentProcessStrings( + pid: number, + countExport: "kernel_get_argc" | "kernel_environ_count", + readExport: "kernel_argv_read" | "kernel_environ_get", + ): string[] { + const setCurrentPid = this.kernelInstance!.exports.kernel_set_current_pid as + ((pid: number) => void) | undefined; + const countFn = this.kernelInstance!.exports[countExport] as + (() => number) | undefined; + const readFn = this.kernelInstance!.exports[readExport] as + ((index: number, bufPtr: KernelPointer, bufLen: number) => number) | undefined; + if (!setCurrentPid || !countFn || !readFn) return []; + + setCurrentPid(pid); + const count = countFn(); + const out: string[] = []; + const maxLen = Math.min(CH_DATA_SIZE, 65536); + const ptr = this.scratchOffset + CH_DATA; + const kernelMem = this.getKernelMem(); + const decoder = new TextDecoder(); + for (let i = 0; i < count; i++) { + const len = readFn(i, this.toKernelPtr(ptr), maxLen); + if (len <= 0 || len > maxLen) continue; + const copy = new Uint8Array(len); + copy.set(kernelMem.subarray(ptr, ptr + len)); + out.push(decoder.decode(copy)); + } + return out; + } + + snapshotProcessArgv(pid: number): string[] { + return this.snapshotCurrentProcessStrings(pid, "kernel_get_argc", "kernel_argv_read"); + } + + snapshotProcessEnv(pid: number): string[] { + return this.snapshotCurrentProcessStrings(pid, "kernel_environ_count", "kernel_environ_get"); + } + /** Format a syscall for logging, decoding path/string args from process memory */ private formatSyscallEntry(channel: ChannelInfo, syscallNr: number, args: number[]): string { const name = SYSCALL_NAMES[syscallNr] ?? `syscall_${syscallNr}`; @@ -5634,19 +5748,31 @@ export class CentralizedKernelWorker { } const parentPid = channel.pid; - // Skip pids that are already registered (e.g., pid 3 is nginx master) - while (this.processes.has(this.nextChildPid)) { - this.nextChildPid++; - } - const childPid = this.nextChildPid++; - - // Clone the Process in the kernel's ProcessTable + // Clone the Process in the kernel's ProcessTable. The JS host tracks live + // workers, but the kernel is the source of truth for zombies/limbo process + // records that still occupy a pid until POSIX wait semantics release them. + // If the host-side monotonic counter lands on such a pid, retry with the + // next candidate instead of surfacing EEXIST to fork() callers. const kernelForkProcess = this.kernelInstance!.exports.kernel_fork_process as (parentPid: number, childPid: number) => number; - const forkResult = kernelForkProcess(parentPid, childPid); - if (forkResult < 0) { + let childPid = 0; + let forkResult = 0; + for (let attempts = 0; attempts < 4096; attempts++) { + while (this.processes.has(this.nextChildPid)) { + this.nextChildPid++; + } + childPid = this.nextChildPid++; + forkResult = kernelForkProcess(parentPid, childPid); + if (forkResult === 0) break; + if (((-forkResult) >>> 0) !== EEXIST) break; + } + if (forkResult < 0 || childPid === 0) { // Fork failed in kernel (e.g., ESRCH, ENOMEM) - this.completeChannel(channel, SYS_FORK, _origArgs, undefined, -1, (-forkResult) >>> 0); + const errno = ((-forkResult) >>> 0) || EEXIST; + console.error( + `[kernel] kernel_fork_process failed parent=${parentPid} child=${childPid} errno=${errno}`, + ); + this.completeChannel(channel, SYS_FORK, _origArgs, undefined, -1, errno); return; } @@ -6220,13 +6346,19 @@ export class CentralizedKernelWorker { // (it loops on SYS_EXIT). if (tid > 0) { const ctidKey = `${channel.pid}:${tid}`; - const ctidPtr = this.threadCtidPtrs.get(ctidKey); - if (ctidPtr && ctidPtr !== 0) { - this.threadCtidPtrs.delete(ctidKey); + const ctidPtr = this.threadCtidPtrs.get(ctidKey) ?? 0; + this.threadCtidPtrs.delete(ctidKey); + if (ctidPtr !== 0) { const procView = new DataView(channel.memory.buffer); + const before = procView.getInt32(ctidPtr, true); procView.setInt32(ctidPtr, 0, true); const i32View = new Int32Array(channel.memory.buffer); - Atomics.notify(i32View, ctidPtr >>> 2, 1); + const woken = Atomics.notify(i32View, ctidPtr >>> 2, 1); + if (THREAD_TRACE) { + console.error(`[thread] exit pid=${channel.pid} tid=${tid} clear ctid=0x${ctidPtr.toString(16)} before=${before} woken=${woken}`); + } + } else if (THREAD_TRACE) { + console.error(`[thread] exit pid=${channel.pid} tid=${tid} missing ctid`); } } @@ -6248,19 +6380,31 @@ export class CentralizedKernelWorker { // Run the kernel's exit path so it closes all FDs (including pipe // write ends). kernel_exit calls sys_exit then traps — catch the trap. { - const kernelView = new DataView(this.kernelMemory!.buffer, this.scratchOffset); - kernelView.setUint32(CH_SYSCALL, syscallNr, true); - kernelView.setBigInt64(CH_ARGS, BigInt(exitStatus), true); - const handleChannel = this.kernelInstance!.exports.kernel_handle_channel as - (offset: KernelPointer, pid: number) => number; - this.currentHandlePid = channel.pid; - this.bindKernelTidForChannel(channel); - try { - handleChannel(this.toKernelPtr(this.scratchOffset), channel.pid); - } catch { - // Expected: kernel_exit traps with unreachable after closing FDs - } finally { - this.currentHandlePid = 0; + if (EXIT_TRACE) console.error(`[exit] pid=${channel.pid} status=${exitStatus} mark start`); + const markExited = this.kernelInstance!.exports.kernel_mark_process_exited as + ((pid: number, status: number) => number) | undefined; + if (markExited) { + const rc = markExited(channel.pid, exitStatus); + if (rc < 0) { + console.error(`[handleExit] kernel_mark_process_exited failed for pid=${channel.pid}: errno=${-rc}`); + } + if (EXIT_TRACE) console.error(`[exit] pid=${channel.pid} mark done rc=${rc}`); + } else { + const kernelView = new DataView(this.kernelMemory!.buffer, this.scratchOffset); + kernelView.setUint32(CH_SYSCALL, syscallNr, true); + kernelView.setBigInt64(CH_ARGS, BigInt(exitStatus), true); + const handleChannel = this.kernelInstance!.exports.kernel_handle_channel as + (offset: KernelPointer, pid: number) => number; + this.currentHandlePid = channel.pid; + this.bindKernelTidForChannel(channel); + try { + handleChannel(this.toKernelPtr(this.scratchOffset), channel.pid); + } catch { + // Compatibility with older kernels where kernel_exit traps after cleanup. + } finally { + this.currentHandlePid = 0; + } + if (EXIT_TRACE) console.error(`[exit] pid=${channel.pid} legacy mark done`); } } diff --git a/host/src/node-kernel-host.ts b/host/src/node-kernel-host.ts index c29214aee..16c713bab 100644 --- a/host/src/node-kernel-host.ts +++ b/host/src/node-kernel-host.ts @@ -95,6 +95,8 @@ export interface SpawnOptions { /** Initial real/effective group ID for the process. */ gid?: number; stdin?: Uint8Array; + /** Stdio fds (0, 1, 2) that should be host-backed pipes, not terminals. */ + pipeStdio?: number[]; /** Optional pre-compiled module for the supplied program bytes. */ programModule?: WebAssembly.Module; pty?: boolean; @@ -217,6 +219,7 @@ export class NodeKernelHost { ptyCols: options?.ptyCols, ptyRows: options?.ptyRows, stdin: options?.stdin, + pipeStdio: options?.pipeStdio, maxAddr: options?.maxAddr, }) as number; diff --git a/host/src/node-kernel-protocol.ts b/host/src/node-kernel-protocol.ts index 853fc19b4..d40fdfa21 100644 --- a/host/src/node-kernel-protocol.ts +++ b/host/src/node-kernel-protocol.ts @@ -63,6 +63,8 @@ export interface SpawnMessage { ptyCols?: number; ptyRows?: number; stdin?: Uint8Array; + /** Stdio fds (0, 1, 2) that should be host-backed pipes, not terminals. */ + pipeStdio?: number[]; /** Limit heap growth to protect thread channel pages */ maxAddr?: number; } diff --git a/host/src/node-kernel-worker-entry.ts b/host/src/node-kernel-worker-entry.ts index 2ecd3310d..cadcf5ab4 100644 --- a/host/src/node-kernel-worker-entry.ts +++ b/host/src/node-kernel-worker-entry.ts @@ -654,9 +654,15 @@ function handleSpawn(msg: SpawnMessage) { kernelWorker.onPtyOutput(ptyIdx, (data: Uint8Array) => { post({ type: "pty_output", pid, data }); }); - } else if (msg.stdin) { - const stdinData = msg.stdin instanceof Uint8Array ? msg.stdin : new Uint8Array(msg.stdin); - kernelWorker.setStdinData(pid, stdinData); + } else { + if (msg.pipeStdio) { + kernelWorker.setStdioPipes(pid, msg.pipeStdio); + } + if (msg.stdin) { + const stdinData = + msg.stdin instanceof Uint8Array ? msg.stdin : new Uint8Array(msg.stdin); + kernelWorker.setStdinData(pid, stdinData); + } } const initData: CentralizedWorkerInitMessage = { @@ -1070,6 +1076,7 @@ async function handleClone( if (threads) { const idx = threads.indexOf(threadEntry); if (idx >= 0) threads.splice(idx, 1); + if (threads.length === 0) threadWorkers.delete(pid); } }; const terminateThreadEntry = (): Promise => { diff --git a/host/test/multi-worker.test.ts b/host/test/multi-worker.test.ts index 1183a957f..618cb0e3d 100644 --- a/host/test/multi-worker.test.ts +++ b/host/test/multi-worker.test.ts @@ -19,6 +19,7 @@ import { CH_DATA, CH_ERRNO, CH_RETURN, + HOST_INTERCEPTED_SYSCALLS, } from "../src/generated/abi"; const MAX_PAGES = 1024; // 64 MiB: enough to prove initial < maximum. @@ -91,6 +92,24 @@ describe("CentralizedKernelWorker Process Management", () => { expect((kw as any).hostReaped.has(pid)).toBe(false); }); + it("marks selected stdio descriptors as host-backed pipes", () => { + const setStdioPipe = vi.fn(() => 0); + const kw = Object.assign(Object.create(CentralizedKernelWorker.prototype), { + kernelInstance: { + exports: { + kernel_set_stdio_pipe: setStdioPipe, + }, + }, + }); + + kw.setStdioPipes(321, [0, 1, 2, -1, 3]); + + expect(setStdioPipe).toHaveBeenCalledTimes(3); + expect(setStdioPipe).toHaveBeenNthCalledWith(1, 321, 0); + expect(setStdioPipe).toHaveBeenNthCalledWith(2, 321, 1); + expect(setStdioPipe).toHaveBeenNthCalledWith(3, 321, 2); + }); + it("lets the host terminate pthread workers without waking SYS_EXIT back into guest code", () => { const pid = 123; const mainChannelOffset = WASM_PAGE_SIZE; @@ -207,6 +226,7 @@ describe("CentralizedKernelWorker Process Management", () => { resolveClone = resolve; }); }); + const channel = { pid, channelOffset: mainChannelOffset, memory }; const kw = Object.assign(Object.create(CentralizedKernelWorker.prototype), { callbacks: { onClone }, @@ -219,8 +239,9 @@ describe("CentralizedKernelWorker Process Management", () => { scratchOffset: 0, currentHandlePid: 0, processes: new Map([ - [pid, { channels: [{ channelOffset: mainChannelOffset }] }], + [pid, { channels: [channel] }], ]), + activeChannels: [channel], threadCtidPtrs, completeChannel: vi.fn(), bindKernelTidForChannel: vi.fn(), @@ -236,7 +257,7 @@ describe("CentralizedKernelWorker Process Management", () => { }); (kw as any).handleClone( - { pid, channelOffset: mainChannelOffset, memory }, + channel, [0, stackPtr, 0, tlsPtr, ctidPtr, 0], ); @@ -354,6 +375,57 @@ describe("CentralizedKernelWorker Process Management", () => { kw.unregisterProcess(100); }); + it("retries fork pid allocation when the kernel still owns a zombie pid", async () => { + const parentPid = 77; + const memory = new WebAssembly.Memory({ + initial: 4, + maximum: 4, + shared: true, + }); + const channel = { + pid: parentPid, + channelOffset: WASM_PAGE_SIZE, + memory, + }; + const kernelForkProcess = vi.fn((_parent: number, child: number) => + child === 100 ? -17 : 0, + ); + const completeChannel = vi.fn(); + const onFork = vi.fn(() => Promise.resolve([WASM_PAGE_SIZE])); + const kw = Object.assign(Object.create(CentralizedKernelWorker.prototype), { + callbacks: { onFork }, + nextChildPid: 100, + processes: new Map([[parentPid, { channels: [channel] }]]), + threadForkContexts: new Map(), + tcpListenerTargets: new Map(), + epollInterests: new Map(), + inheritSharedMappings: vi.fn(), + completeChannel, + kernelInstance: { + exports: { + kernel_fork_process: kernelForkProcess, + kernel_clear_fork_child: vi.fn(() => 0), + kernel_reset_signal_mask: vi.fn(() => 0), + }, + }, + }); + + (kw as any).handleFork(channel, [0]); + await Promise.resolve(); + + expect(kernelForkProcess).toHaveBeenNthCalledWith(1, parentPid, 100); + expect(kernelForkProcess).toHaveBeenNthCalledWith(2, parentPid, 101); + expect(onFork).toHaveBeenCalledWith(parentPid, 101, memory, undefined); + expect(completeChannel).toHaveBeenCalledWith( + channel, + HOST_INTERCEPTED_SYSCALLS.SYS_FORK, + [0], + undefined, + 101, + 0, + ); + }); + it("should throw when registering duplicate PID", async () => { const kw = new CentralizedKernelWorker( { maxWorkers: 4, dataBufferSize: 65536, useSharedMemory: true }, diff --git a/host/test/select-timeout-retry.test.ts b/host/test/select-timeout-retry.test.ts new file mode 100644 index 000000000..4bf49a426 --- /dev/null +++ b/host/test/select-timeout-retry.test.ts @@ -0,0 +1,126 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { + CH_ERRNO, + CH_RETURN, +} from "../src/generated/abi"; +import { CentralizedKernelWorker } from "../src/kernel-worker"; + +describe("centralized select/pselect timeout retries", () => { + afterEach(() => { + vi.useRealTimers(); + }); + + it("preserves a finite pselect6 deadline across retry wakes", () => { + vi.useFakeTimers(); + vi.setSystemTime(0); + + const kernelMemory = createSharedMemory(); + const processMemory = createSharedMemory(); + const scratchOffset = 128; + const handleChannel = vi.fn(() => { + const kernelView = new DataView(kernelMemory.buffer, scratchOffset); + kernelView.setBigInt64(CH_RETURN, -1n, true); + kernelView.setUint32(CH_ERRNO, 11, true); + return 0; + }); + const worker = createWorkerHarness({ kernel_handle_channel: handleChannel }); + worker.kernelMemory = kernelMemory; + worker.scratchOffset = scratchOffset; + + const channel = createChannel(42, processMemory); + worker.processes = new Map([ + [42, { pid: 42, memory: processMemory, channels: [channel], ptrWidth: 4 }], + ]); + worker.activeChannels = [channel]; + + const readfdsPtr = 1024; + const tsPtr = 2048; + const processView = new DataView(processMemory.buffer); + processView.setUint8(readfdsPtr, 1); + processView.setBigInt64(tsPtr, 0n, true); + processView.setBigInt64(tsPtr + 8, 10_000_000n, true); + + const origArgs = [1, readfdsPtr, 0, 0, tsPtr, 0]; + worker.handlePselect6(channel, origArgs); + expect(worker.completeChannel).not.toHaveBeenCalled(); + expect(handleChannel).toHaveBeenCalledTimes(1); + + vi.advanceTimersByTime(5); + worker.wakeAllBlockedRetries(); + expect(worker.completeChannel).not.toHaveBeenCalled(); + expect(handleChannel).toHaveBeenCalledTimes(2); + + vi.advanceTimersByTime(4); + expect(worker.completeChannel).not.toHaveBeenCalled(); + + vi.advanceTimersByTime(1); + expect(worker.completeChannel).toHaveBeenCalledWith( + channel, + expect.any(Number), + origArgs, + undefined, + 0, + 0, + ); + }); +}); + +function createWorkerHarness(exports: Record): any { + return Object.assign(Object.create(CentralizedKernelWorker.prototype), { + kernelInstance: { exports }, + kernel: { + toKernelPtr(value: number | bigint): number { + return Number(value); + }, + }, + kernelMemory: createSharedMemory(), + scratchOffset: 128, + config: {}, + callbacks: {}, + processes: new Map(), + activeChannels: [], + syscallRing: new Map(), + channelTids: new Map(), + threadForkContexts: new Map(), + stdinFinite: new Set(), + stdinBuffers: new Map(), + alarmTimers: new Map(), + posixTimers: new Map(), + pendingSleeps: new Map(), + pendingPollRetries: new Map(), + pendingSelectRetries: new Map(), + pendingPipeReaders: new Map(), + pendingPipeWriters: new Map(), + socketTimeoutTimers: new Map(), + pendingCancels: new Set(), + tcpListeners: new Map(), + tcpListenerTargets: new Map(), + tcpListenerRRIndex: new Map(), + sharedMappings: new Map(), + tcpConnections: new Map(), + shmMappings: new Map(), + usePolling: false, + completeChannel: vi.fn(), + dequeueSignalForDelivery: vi.fn(), + bindKernelTidForChannel: vi.fn(), + assertKernelStackContext: vi.fn(), + }); +} + +function createSharedMemory(pages = 1): WebAssembly.Memory { + return new WebAssembly.Memory({ + initial: pages, + maximum: pages, + shared: true, + }); +} + +function createChannel(pid: number, memory: WebAssembly.Memory): any { + return { + pid, + memory, + channelOffset: 0, + i32View: new Int32Array(memory.buffer, 0), + handling: false, + }; +} From 3d27d6ed6bb72d0f184208fe261fd039474de3bd Mon Sep 17 00:00:00 2001 From: Kandelo Agent Date: Mon, 15 Jun 2026 02:33:53 +0000 Subject: [PATCH 2/8] fix: share anonymous mmap across fork --- examples/mmap_shared_anonymous_fork.c | 84 +++ examples/mmap_shared_anonymous_fork.wasm | Bin 0 -> 30840 bytes host/src/kernel-worker.ts | 781 ++++++++++++++++++++++- host/test/mmap-shared.test.ts | 60 ++ 4 files changed, 904 insertions(+), 21 deletions(-) create mode 100644 examples/mmap_shared_anonymous_fork.c create mode 100644 examples/mmap_shared_anonymous_fork.wasm diff --git a/examples/mmap_shared_anonymous_fork.c b/examples/mmap_shared_anonymous_fork.c new file mode 100644 index 000000000..b876abfa2 --- /dev/null +++ b/examples/mmap_shared_anonymous_fork.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include + +static int wait_ok(pid_t pid) { + int status = 0; + if (waitpid(pid, &status, 0) < 0) { + perror("waitpid"); + return 0; + } + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + fprintf(stderr, "child failed: status=%d\n", status); + return 0; + } + return 1; +} + +int main(void) { + const long page_size = sysconf(_SC_PAGESIZE); + if (page_size <= 0) { + perror("sysconf"); + return 1; + } + + char *shared = mmap( + NULL, + (size_t)page_size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + -1, + 0 + ); + if (shared == MAP_FAILED) { + perror("mmap"); + return 1; + } + + shared[0] = 'A'; + pid_t pid = fork(); + if (pid < 0) { + perror("fork"); + return 1; + } + if (pid == 0) { + if (shared[0] != 'A') { + fprintf(stderr, "child did not see parent write: %c\n", shared[0]); + _exit(2); + } + shared[0] = 'B'; + _exit(0); + } + if (!wait_ok(pid)) return 1; + if (shared[0] != 'B') { + fprintf(stderr, "parent did not see child write: %c\n", shared[0]); + return 1; + } + printf("inherited anonymous mapping coherent\n"); + + shared[1] = 'C'; + pid = fork(); + if (pid < 0) { + perror("fork second"); + return 1; + } + if (pid == 0) { + shared[1] = 'D'; + _exit(0); + } + if (!wait_ok(pid)) return 1; + if (shared[1] != 'D') { + fprintf(stderr, "parent did not see second child write: %c\n", shared[1]); + return 1; + } + printf("reused anonymous backing coherent\n"); + + if (munmap(shared, (size_t)page_size) < 0) { + perror("munmap"); + return 1; + } + printf("PASS\n"); + return 0; +} diff --git a/examples/mmap_shared_anonymous_fork.wasm b/examples/mmap_shared_anonymous_fork.wasm new file mode 100644 index 0000000000000000000000000000000000000000..76b13720f99d967a89d20757e9a1c0ca27bf1e43 GIT binary patch literal 30840 zcmeI5eUM${eczvR&V4_3cdvv1u`A%*%L*h5fv~ZJ!45jtAS7O8LtMvw5zFcV`=Whc zA!?yrETJf^-H^s&lhqqMlVy zvK#B*OTu4_8l|bu!c1p6_LoDC&n`^4AS)co3ZLjK9U5DBV!T_?$SNp2dT60DcG#7( zE}OEN&dkw?h1r=y?&9O<}2ADiEhs3|%!UhvC4+$c14P)(1CS z7DfTZF5u4<3c;LtV%3%ZINUgN=+NTQ*!a|;x!H-C zrOrYW)D9h5nqKr!hS|g7#6RprQS58RrYD}5i3;?53XwQ8F*C8G9_~X|9pd5e#6o8r zdM(cwH4iO~JvQA5+`yqjV~G%MRzyD z*#y4H(v#xMLyOb1ON(ybm4^i1`vyKAOr7Ype&e)t^ zUGeg#=B7MRmuH>=^h1wzo|u?%yDz)2yfbsywR^1>I(@Bg>$T3x)%wGO-7$b?spIac z1UssOflH&fvT^_Z;qTgV%T4tls{PAf3GVnG4}{mQ6i-|ir04IPYL!QWq@13QCYzhN zev@m$^+&iix&AQM7S}Ixy^bsG(#k^f+U&Nv(A>i93)Eg7?hW@wdsEjeM%*Pqvy>E* zjp-VH<;8mkoSr6|8`Z4;UsBrXT&q~1uZojmx)#jqnY*p`41^5CfJu-BlQi^2MIM?( z%EHN3V5R#8TwD)|v0BH>RGMn_CU4d59f%p)-3=5@?UQ-#1fOec7^eL|-6m*O(v@ai zz~AVCRyEzlC~5UR+A>R+j^3lYl*LxmB5GFCpgEiroAsDwYldo?Otpga+WP>%(hLMq z8YICI!+hUw6z1>l6(<#~p~A|FNxaaElS+#r9g{LbF-;yC2(l&&P|OEll`)gNXJ27& zxRSy7&QOB0=5V;ex|z2CKEtKSw6;5RsWV(f6cv^xw}rx8oD2_?JQf1$!IFmgf1q9h zqhS)#48RJL)?dq3zt%wN9vmp7E*Y3=4kU59dfu^`dVz)07g)hzML3KyLJp+A_9H*@ zhwg#G-gG4yNNdwA7CM=L(Tr8@YgI?+S%d`OS)^`3RFGjK)eeA|>cBv~C_1Z60L^Hg zvqavCHGwO!jj>3xSa-3bkAv33AO!F>)(47EEnES`g>7M>6~G|FNt8AwlOPEjo6=C_ zP$nx+^72UK^=|pDyu6_DMz?(LR5ngg<(soGa%BtAXt$4FrQBg(G ztym4Rp;SED3I>9-8bj!2AfZVLqONcc;b6>da4;OHmB*~}`Mk3g85bADRT7b#?h%Dx zao617B}+0<@w`Wy8(r9?ty7oC%SXm+l|E)`R&9PL6);u=5d=;%NFu1}84$jA4^kP5 z4Uu^yn|h7K{1P*&dTdB9v0+jXPi8$$_{9X08<=1arlU7KHG)Z^lf2oQD5X^&BUZ$S zm1J--8KlDNtoufqRpzWp+Kr8nPd_qDh#*RKy(W-g{H2>p_lY^x79hm2w@*)|-~Yp> z3esFO38tjm(u#V6=3+Vmj+5fuEV$0{xzCVS7xI!KOB9DDWe`;tqFTl6k=qlUjVLOn zl}USgDKgY#xY}4BRFCv*!~o)9qn(C_GtkXUF;GPcL}IjH4Ut`2`{aCUFp0hN&W|)3 z$b{*a?24wz{`KxC(MqSZviLlUdw5=&EFGFpqkSV<{YdMw^eb*^@V1Sc1~zTnxN%eO zZ&0^U{=9KhrKtKM1=XMsZrW7aApg9_{~^R0b<92qQFH ziB*)hg&=67kaz96r)#=TFYk$5{p#-H9f7NF=|0`=Tz#bbq+zfS*`U_(syMAARa9H$ z0XQUS$RIY-w_Niw1K0p<4tcm+qX!}D<;{{bVsaTMhhunzcS>NdRZ2!*1ZO+l*&jPq zNGc0Vo{YAKJ_eyi+l4dDQEZcoHOKtji1B4<;i!#2*&Is4X1yN88ZDoz?LI0y+BbB0 zKHZfKTK0oR`Jjf%dJe6hA!J=mZ#;`hbwA6KL>dKJqiy#vYbm#Z@R`=A*bwRR0~nJv z($&ViiCjJX0TD~q@1JsGbFc>NI+$$^!8)VuRsI@}FyLtOdP9%|cs*ulRa#i_ z=Ed$ZB%41GXf4-g*ol+t4`P_Ff6*L;46Q{3q&9=Ts!dO2ePzmJXz3bDev=K*(Cwce z*a&PAf~1jb7-|BBCZk zE}a7sO^>3>r&>eSiN=_^iuDMRp}PmrgX)2D!5;Lr!e@GiR#9K8=p9IN((4ElzsSjxMrT;sYuE^v3;Jjg z(z5jQD;cI8%2n;OZZ#sXKyMePlDafboRn}3r8aWDmUY158}bI~GJAcrHH}!WK~XWb z^oCR^-BFWif_LJQM&o)Dfz<5|-wx5cqu&+2rE58TE5ryP1Q<#G6yratZ$VcpNATSeV3yYa0 zV+M~;#+L)%9Uly!Rf_jfnkjiPP1eBtUF0l7H-56mw5?VXrTA})%!Wh7zX3SASjSv= zNof`52Uz&_=q2>A$0M>^7HYm3;b)Gv?>zG&lX>BmfUmARhvp8NSEN6R9%znaj%QC( zT#bC8SNHM3GVNM?flxBJ84R2byfEt0qT5wx}r(vo3-Q$Ir}9@X^8Pb z!@xsE(DNUf6pNU$Ojkolwi+I2RYn?DB^59ZdDfv{&1t201N^J8-2Amb_@^MZ$6^(l zf_opCII$4nz<5w?uQz88!&uYF#P206yBN2Z~e7_mN3c-}PQK(pGU77wq%M9ve=IR#gBh0bJNNY)A zIASU~J<wGa+rp9es9KyaHD&UX1}-^z;UX|`7;O6 zooa5ik-o4RJWx21Y)!dAt(-ekg#U|rnmTtn8BPB*D?Syswmu`#z?5T!)S3Ukd*sEI ze9ffz>Myiz$Y(DjW~}QK%tb9ck=)R}^-OD9du`|=g}wRw0GQpr!ELhj;7IcZ4fxS> z<_}0?wO#XuwndxpUT^G=Q}da(*4BoStxp3|bDPGe>z&U%bGrR4zfJ*Q<7{g`tER`B z+u~$fd)H~&70#UM0&(YYoAraIW4t;P>kWIN3V@&WT`ogVSp(#i8o_`kL(Q7O#}x~Awaip{^+O}g3e6Zu z83+XmN#&f#)Eq@6pj6)vqPIenB^!-T9u^c`QBSU zOzTm?dvO<5WIh?3#(E3OvQZ!hW7``S#mO?UpT>-`i_#HZSd%IaiAmuQDg;|uORgGW z5Mp$NO8hHLrQve2>0XqJEt0{_mRqRB8W?&gsi{#~oyK9)V&oqJMe1S3L`+g40Eq@m z!heik0gQ&Bdq$?X{*l|)+Msyto_XO3Hpqrdt5YC0J+*o~J@vE4lWRs=S9-lL$#|=) zf62M}qPKp5X^=m0HtG>#C%riSAAYMSx97aUT|qHdo=9OIBsD z`Fr&|%nHL^ui3<(s8G0VP$?;TD&$iKG28ksLG$`<$=aP+-5cGz^W8hjK-?zc#42+H zM_Zfj(d{M-!&czA8d8mDmYuRuTk}cqCZzB@kgKomZH~r@)`c+Rm@-DytVx?r4GX}q zxXALEFu6V%Z6xBZLAa*^_ld=_mwq&|XPitmN1KY1^%6QTMf-ttO%BH{kB$>wF&ueAwYe5$Ez zkmjkfNHNkNC^Q!r?w#w(jWQbu9jG@8ZT)jOz)xj^q1W}X%Vyy-Z)Mw;(}u% zZRhS4NX4X%=+;fQ_*Mk-NcKB5L^9L=g{3H`vKM7W-Gsp(xfc($j+nZ8CMC^Rv4Cv) zds^2BD>3XY1EZXD)nps_meSD7w2swia|<8dl)JJb*Dkk;i`Z4xXcZ=E$e2+{Q6|{v zMwya65lO-3hGdXF8w?$r(W~AC-HfX03suuy3i>U2BSX8N#$qysY*#0jsY*Ugx-;D& zVE2!xWOIT~QtVp@kr$5&Ag<^xjucUH#bk2>Ym)a`m~S+&}8)$kxu%NA9Y*`erOVH{VxWYR<;R`^Fbq>czZBHNN)fyyRWMaOibf08mu zqd7d5HHVb>lf zSIJt5I}Cz8i3Ro?pWKQkr6Av#F^r!;ao-SVX*%)${vK(hy6Q3qlNcfM2rRWMeJ_&K z(~XnqMv){R2xMPv`l`!+7D`_v`tQCfLeogSn5r+tsH4l$;U#o?TAjxNieTw5m<;!( z_#zTONF+^|IUMJo#VwK_e}kdAnvTGv7u7*1VnMc56bN}wMIDJGrQo3g?#Np~lgMg& z^kzWd;&}`LrjU~nahTJ~5J(&7jY|zgh)gm4-yrX^J^FgEejYf&SHEu6U%ROK6<>Yc zs$aXP`mC>h#j0Pus9IY_%zDWw)y9{j z5f(Gcqd`p)euvxxZXE zveI5E!jamSFtQ(R~+oR1n{9cJr;(T7Lk|)2KHGnnm zk+hba7Q9SpR|4F-m9Q+PH{I^Y3Ta0b2qim~G()mr9P^;j&v=1D?0&Y2Q}^N_5u@V! zWW%-amheWk&4JrgRg#^MAy10pR^t57c;G=U0C35)XjN`y^<8l*>%<-1>}Dl*E7!pz zw^9c}dT#E!2#?VKM0H_;5SrWd+-|upU61=aon!4Y;2UstyI_EtZqE1Hk?K1QPnPqw zodk9(iArvFKOFTrX>1R~NecnPSwo-V)Ick?SCe@Eh-F$#A=n9`0sN5-#N7n_Y)ugD z)XZR$m9Y3mkt#KQbxO!442Bh{3ETQO&}xVg8<${2HK&zFmo-+uOXlZkn$fqLw`V|b zUGHHVh^gDL=TJp}C1&doXm#F|hsE;h1{ReS>bZ-)S-!!!Je zAj-UZa$xN(Kc10;v`dI?6gl3J+18Uojq$`UCPy|h>PHi!xMKwZUsdvklt+`jTEy|r z$I;4wP#C(AO)IyF$5qB@MKp4ka8^#aYLDTk7*)b@k=N(ZG z>no*uC(oU3*8E0*z)<12({ZcN4xUM(lZaE{;JIg#@T8E{EcSXQ;uLzH5%}h_g$#>^ zU|%C*j8+Y}3$&`8iGjb66wjRk0^(2lvGeO}sApG5<1ud?uYjuXM5-ZL@P>ylWetpY z0fgl)gmD+bx`(jJuvu?n9_!tMkeCTC6#(JB9KgB(SO>uBxl=LxR&17qQ!xS<1oOv5 zzT+)+>mNCHylICg#AZqT6TV(-MT<`;wno#0;4O(icJ3Kg59Oa}hT6rVxwck>VMhfm zK0cuHR+L-&N31=?v`>bb?1op8a1X8qr^@!=Y|xRm1@VpsFs7_<(1@J!W`Ej98n%LS z!d?q;_Dk1)N?*L zYf$hFDi<}#XE0YKXcsPO;OAiS&{wtH_e_fX=_;VekqS6AGMv8Q#8LlGLmlHkSe6F< zPH#n)k}xHUid%866`VPRZdpl!)9sb#PC^C=0CuK4+2jmi62O-x4#l)C9TjHM!6V&a z;$TXR4`5AwUMOn-`6Kubjg*OYK8lG9Oj7hh?TaOMEmVgIr#2wHP)dK8$)AGT(W4)d zrio0`6u34E%n-e(K${Goy#zLA>j_Q%(l6ppC zrgs|lwOmb%-U7ph-?IFCFdwQgGbUf=utMB5EQq7PIR$PctC<$Yhe&GLxnxBO2r5K9 zqe1+IdKj%#Pk=8Ah;-_q%4)k0$ZZlmmt#cn5A#M{qa@UV+&(MiZ3`&jG-IpmjS?*i z!qhz!7r=Yftd}J#Baq5t))Fe#)}b*3b7hYd4Is|Sdiaq!u7bc zIKcr$ZF;bQE@Q&L7CUsZ1%*xkjbA*rOv0L+k;*yI8ps+QZw;bju!+!;L%3=1zzBEj zD(KFl72zX1VV}ZGHD3-Ewwe@1TA{`qN-BNjWbsE@)f|k0!4_6-0SH>NbpD9yr@ms|u@ zx)E~=@^kHZ3XaucOZ9S`*@d#LB?rS63b9TT$Mh}a|KdXb0~s8Y3HCevHOJaOL!5pZ zA?U6XRyr-A=g5ROg+H<@th>8vtym{(5o!7{4Cw;ujpQ{hFtvF4`U(OQ(d z)&g*3Ey}VMxxbxR3w(KCH*29YJ-=rIcKNg!qTO?wae=xrO@|YO(I|KmSxA3iW&c1Z zRU@;JSwyS_NI%o$fNV3&EQVOmEC%i}*u!F&V6(E!n=Qy{2y;E$-FfmIH-{FC^d!!}};v z6^(8vyCGA@JRD?y5jCv09aN9nBur;Bh)~gXZ0)pnTHD8%=vLdMnzXG4IhY!-sACz~ zsBE0ryBg^8$)-bPklnT^Hwkv$4hr{%dlOzzvJJ+Dvv&|DZ=@vcjUv`eHZD%y(b4X$x% z73rc-f#q_DTMgI@(;x6f)EYWH6(FH*( zFri${VA);}X3HVGsn7SF+v^7ST#oL$Ho903AN1twhKm&nn(A zWPE$58-lu#gSHN8sw%yzbx^PGwGQh3UhARWa1qp^m=t@4e+jcejl6w(s5cAh$|a$u z3R>h<>!4oWYaP`6z1Bm$@gk^!4(A>!WGuYXW!zKP=7QbujM&;0_c#cU6mE6{cenxF z?Q}!8yCD-9V^m2VjLH^M#>v~A(|+oOvOqYJ%Yt*et$}nwPZpr|x0eMFtCLbfG*Ady z=}PHh^y_YYNT{_# z$>5{gn^w08?aI?;bPAU(K*=S+esCGZl5*JwiMiYeJ6E?yyPDX1FOk`tyhy`)zhH(l zME0i7x^iA%iAkQl?xtE10m>zMqQZ3gNdMOLn|A8p4LUB5l0H8HG3^FNf&l4 zUZPQHT_c6wm_C=FmL{7Hj`#1eu{>Nnorp+3BM+s*qCU{rGb}B=dDvVy+*{a2A6q8R zr7V40_(7r^0D55L+=+JM3_**JbjqI?c@YFgAhp4plwW+~dD^;`(?>nfA7T!fZuY5VIm)u=Oh{mM^^Z{xyg(;KyBH0w3tQL*@idxe8%(?c18@OG9cn;G?rJ$`4c}u1c87QAii<~| z4Tu$ph*Ng3+pn^>PQ=l!G(I?N1B7Ytt^K?V$(7-;#y)t(oJRrS= zZ3+ZJU2MzZgutkAGvhu_53OHwO7??ECRU;Jl@?<1A+5?9Bt@+VjR$7%q;xJeS2G07u_ZCY#1p^zoS5?~9ML_x2hQ<50Uhak8wM#GftHm5c!x%CBi;^#iC#)!>TGgv2x zRBLg4U~s6hfz3(t1RbFJ*V&CgqM%+Ry}0gHqPxh|70a8X_V?qh64eb|sxIp{c&WOq z9~(h_%#-dl{%fHV3)7FZlkLjJ57Y-D@^#3<}chO_$mf=P{gM9D~<_V?b zX?R=g&NT}(fT9nQ%?yiXI~0b6FLMssH9nYrBC{SG4S@A1(W|u?*pqR0RGIi=3r6M8 ztaALj1_t!XLKnm{ zIAekb-$o$%G#^7`AeIe8)<&u;T#|ca&_V@0vx-k^a+M2P&vzeeQP38oy(UitFH6|{ z68G)5Ajo;8L$3?dv%mYiRlKA|B82$CrN62;z>Szcll~NS@>lSZXbrdkVry2B2FOS0 z8}DU}C+{acuLiiGQM9c{e-gJpy$MV`div;z0To?@0x5&A>wmwV{^;j zCFF7a9U0xo-;wJj!LN3t)B=A;wonk+IKj8B1os4{-*TZFju<=*+AH4*%a8 z1cUgH+>~AVKW0OJcl6XVOj3AFO0TKIRtbwy$$n)z;;$;6CR}PzlS5D6a zlP}B3Lwj5)Zx$w=(m6ifaz&T$ws4?8vNROC_@yX_R?q|KJBwtz~Yq|%D41tu|?dO1Zykfq~&v8>f?e4mptBH`Zj zoB4zaerR?7-*i4@h>+e3=F=2!e%|s7_SI7F(iC6&+`Bsk#{l2&;=c&Pkgysy^KL>S zJ&>SXrLf7&*&|;Pp%fd)(6)nJr2V3-ngo8gp>&{c!9E>(L-_!FSqJ6DGujA}JwOjA z3(2>eRHj=5Rd_()x-gX!%5m+3i--AnX>q~D5$Pv$zPws-o?Rqk{mD)BBij=K|Y1e zMJa49n@Qnt*@8RAB_UhDWmH;0DQsSQR|?ytBHY`{Cl?1v`jY=>6tU>&6D3vlV&z7I zMrjx-ttK4sO&Voj)uF`(qGva@Tva!!vXVN_2z44Yd5%z1mQhXvX~{SnhHurX4|7f6SJywr`tF*XI}A+hu)VNjF-(w-Iii!%_6%=1}} z(Gt0`G73JYOc`H?;LQ+jMDKRBd#lQ~=%DMHS;rSbw1u=tl)k{h3`@81`DQTL7?huZ zP?&<+Usy@migAt6_-^(bteA5tybTPLqFo5Cw}rd#!64AsTVSH$s2d zpB2h2Q%QsU6gCAwx3;$_x-=wB0|>Ba5WcxULQZCA1is48#9sH92FL*A z$(R2I1Kt{bkgP~MY~0bM>F{{EQLBV{o_94d%N0{Lz-1G}7`l=m{%J-w-&>~-5?5I9DSLe_ zqubI&yGdsMx8w#{Fx>5>Ts&HMC>tuhljZ^6fd(v; zmyV|A_2g2r!`526f$@y zlL$4L$8BgI6gP_k0^&2GH8ei4Tq&|lAG*%RYTf9q#3H)Un|dXR#8J44+%KBPqIHYn zGG>au7zSE+`q|vK$UGzRuo>@c2K@^4qj?UOn)^xc#rH-9=?lIO^QFH-{Kw>&hp+j& zybud#xofQnz8Q+s*OU?s8aHIOAk;<&0~{%}TJr+EezTcKs9!u&aM4UMHt?bSv_|lxNLO ze8Dr%*|!O%>~JuZ`<@Zh0C?n^SWyIc2nX~5+{3U-@_ij;Bq5rR}MkHjN*F;cPm zE+ZWaD8#=S;Gui0LtB5aDA?)x^o*JF{u&J?=3dv_3emwA7vkcnP*w*iox%nMsm?Wt zO?5s76{0V?!_6b+7dPQfMhkpHSMS^I#M{=K!wl$pz>~ND7=uZ~7?~IZ`pAO!p9R>& zbQpOo3(J=yMQ=G$^p+!q%`rzx{H=dk-z3QVqI9LP{eo?MRcTj0Zoo@;Ot!fhw0N-Z zB9*;G2|2)FPP7o&hsO4#>&7U9CiRP8p!JzwR$AwhF{#-OLqhVKCdMuKP_9d~#l@BY-eEMhPQ;LhZgOcO#U+DuTX%8?I8Ys`7KP^CADbqRhgf zig`V#p1gRWWn(2I$)q1}L9hc!@Nx@1s+^b?c4^eaO4yguoY_>yU{U-+5RiIyFN=ZC z$#mE{v}r#oVBAi(ZNLc>ZnA>yDz|M(^itz8wpUite+(_;8XBdpYz6}V;*qpv?#HyP zv7K(a2^dCxTY7RunPpiN4ap|_sUQf%BYNQr*A*C;L9b4dLgG72xKW~wGCU|6!5XXK zfmW!u+Co?*+AEk{_LU>{nq~G%ni{IlC0^-MO!q6hjgBN2iV13r^38~0`nd+$PDd|ucI_oip^ojIGsv_{Utl3IKa z(j5LIpdKp9+C%MdgX8Tyb(Y>h98h1D=tw_JS>tALyuZDC-4xK4rrALo#-P=@LHOQn zv3LBB#6s!IKAYh&=8EYXEXD$nK-dxT8%Q zk7ZPDgWYsANGpC@3n$*g5%K;cqQKFo$bss z2(eT92VX+Rlb7IVmghuHO&l};3DvhWS%42aT>SU&pe->Z0AW~vcEEtudL*{|mAlR? z1<#_YkBRcATFwE47kE`km~a2bkJ#Wqiqda7X`=4H>s+-={b#P)Uoq(2hG0%#H6tI- zlh8>2Crf%6&nNj_UhhTe00 zS%%)^!;e74`b}T%Y#T2Na<{q6+CA#SWH`a4C+$s}rJIBEKLm6B@@aNO;#-)lca%d) z*`Dl-16=|RjC2orAhC$amLNEhou}&`ck&Ljdu$bQvXmFp49ZHqaQ? zm9Il_RJbV2j)w)uvyVO`eJPl{w*nW(@n)BXzN?Z^B!YFRkA`?WEDqN(w_RBQ)Xa=o zx4_%C#+7{qAdYmg6^w##MABq8hDmsYBXycCu6cB&TPEW&vnerIrN|QZafx1|rTkKG z>0r!9pby8#b$o*m;Q!gxQBZLF1+R?k+>s>7u8;3J=63G-_>Ns4=YHp|V>@;oODLL{ zd9t%GvD7)7jLpo>Jbh$#c`-RMHa9mh^F%T}OI?Q_(pl&%FRp9x*x2~gx)w*4XXxdQ z=woJV#@P@0EIz$BK0EU`b3ASZ$zq4+!|tiEiKV%T!|q`E@WbxG^g(yv!F}!~w{2#5 zdV0IFU*Jg|ADfu&9KJo#kCH4eerW6AT2^s*;&3uEyTn+XWNu90B~NJqw*{I3K%CDR9n|HWs1RKyk1c7hp*L!iNb@fyS`rrDht?qQ~jkwL#{K&qnTccL* z_MgXR|M~=qho`EyAempX9Z{BmbzyyNbg zJu)^iGR==fbrRrD4v#G{`O|ZPBUzjq8}B63oyV6FgW>1|t9WRcr^gfjOHW$H z?D7Jw9$Q{~+8xkOE-ft2NlzfvOPwRh_}KJxuN5V8vkPMj6Vp#8%QIs~kzQN$ftjQH z3=^F?ohkR>h1tc$orgP`E}5Q~nWF#L;hnQHbg~utq1|2f(@d<=u))aS?8CGCCYAkC zQzD!$L4sti(^=?4-O|Jnz?)rOTHp9_=`Oc_c5x}0SWFJjJ~h*QSf1gxn#P|rNK<~5 z2}DBfgbuE~Fgw+mN#-WN*nyiL%7~lOBU=M_9rN8>dwOy zvGG}ctjkDvZ~^qofI;Jqgk_y%jgKL#*c7)f`^hisKXGJkx+7eAY8`_nv8HYZQEx1RU?wn8YOL|Y(@=b$eqjjro;a!ecSG>49hc*yOA2tyDAnMSo z5s1B~ZuiH>j!1{BZ=1{Jr7BGkWN31E!}YK|*jajtpXKX{fBxJr{+H4(GrnKHIvIP+ zuM0vyw*2_xu!Gm7J%VQ|M%Cc!NM-e~AgFqUALdu~gadwsEg5?p{hD|TCyP%mFPWO^ zS%tA_{ixj2BJ$kA>=RN{eTuKY6v@C4nCS8|euuF;ABA5LmYx#($nq?V-q zECszev3vw7FXNs49V2^IWESHA4BEt#vvbK9MhR_9r;ld*lT39yv3dx&j}kF*=h&R- z!2H=58wo|P9(F3^NAuvrNkfsSxY$`RL*(2M_ni9)N5mgq9{%(2Q{mOoEz#EC&)na2 z?daxk&3(Z=9Nf)m>%-w^!`~0r!jHJc@WJT6xP3t<`1`nBJKSw#`?y9S<(tXjluj&%}d*`llwHozN zt?Yu4Dv^t;mZ*$fNqw@Od1GA%^9M#AQ2hW-uB)oJs>*JC-mZJCcki6LO!xLt^~HSl z*Y`E$w$b-E#C<^@uIh{V?62=@%I(I!M{+mL*+J539Zr3gC zcZ{!8seM#^F`xbQ-A;CMMc<<_h<#LjF`xXm>z4Pv@7DM4-gDar()Ne%x_e*GA6{E| zn}^_w;n+A|VMhO7K23kw|LJ|sk{KUb_jiJ;My=pF zBe?nmM?pdVpWss4zs=vHgNVux1_gOI!9;O)?6V-Jl%7JzFE5sM-SpnwH{Ig?buh%QgW3DkA|rWbcyM(C~|BURMC^KqOtej;^8{MhpJ)K2ft@2D*DTc|tnFL!K|;-6DA zv(pR&U3LsDbTDO#Wp+y8FYc%vk;=z)U+(M}>KkY0?A+3hE9Cm`%zr_3r`)e;Jca)a D4Nm!D literal 0 HcmV?d00001 diff --git a/host/src/kernel-worker.ts b/host/src/kernel-worker.ts index f86729b4d..47fd3c264 100644 --- a/host/src/kernel-worker.ts +++ b/host/src/kernel-worker.ts @@ -457,6 +457,43 @@ interface ProcessRegistration { * max_addr as channels are added; dynamic pthread control slots must not. */ explicitMaxAddr: boolean; + /** True when the guest glue traps instead of returning on CH_ERROR. */ + channelErrorTraps?: boolean; +} + +interface SharedMmapFdStat { + key: string; + size: number; +} + +interface SharedMmapBacking { + key: string; + path: string; + handle: number; + anonymous: boolean; + writable: boolean; + pages: Map; + dirtyPages: Set; + refCount: number; + version: number; +} + +interface SharedMmapMapping { + fd: number; + fileOffset: number; + len: number; + writable: boolean; + backingKey: string; + snapshot: Uint8Array; + version: number; +} + +interface SysvShmMapping { + segId: number; + size: number; + readOnly: boolean; + snapshot: Uint8Array; + version: number; } interface RegisterProcessOptions { @@ -790,12 +827,14 @@ export class CentralizedKernelWorker { recvPipeIdx: number; schedulePump: () => void; }>>(); - /** Per-process MAP_SHARED file-backed mappings: pid → Map */ - private sharedMappings = new Map>(); + /** Per-process MAP_SHARED mappings: pid → Map */ + private sharedMappings = new Map>(); + /** Host page-cache entries backing MAP_SHARED mappings. */ + private sharedMmapBackings = new Map(); + /** Cached process-fd to shared-mmap backing resolution. Negative entries avoid per-read fstat/path probes. */ + private sharedMmapFdCache = new Map(); + /** Monotonic id for anonymous MAP_SHARED backings. */ + private nextAnonymousMmapBackingId = 1; /** Host-side mirror of epoll interest lists: "pid:epfd" → interests. * Maintained by intercepting epoll_ctl results. Used by handleEpollPwait * to convert epoll_pwait to poll without calling kernel_handle_channel @@ -2106,6 +2145,10 @@ export class CentralizedKernelWorker { logEntry = this.formatSyscallEntry(channel, syscallNr, origArgs); } + this.synchronizeSharedMappingsForSyscallBoundary(channel, syscallNr); + this.synchronizeSysvShmMappingsForSyscallBoundary(channel); + this.flushSharedMappingsBeforeFileSyscall(channel, syscallNr, origArgs); + // --- Intercept fork/exec/clone/exit before calling kernel --- // These syscalls need special async handling that can't go through // the blocking host_fork/host_exec imports. @@ -2519,26 +2562,20 @@ export class CentralizedKernelWorker { console.error(`[BRK ALERT] pid=${channel.pid} brk returned 0x${(retVal >>> 0).toString(16)} — IN THREAD REGION!`); } - // --- File-backed mmap: populate mapped region with file data --- + // --- mmap backing: populate file mappings and track MAP_SHARED mappings --- if (syscallNr === SYS_MMAP && retVal > 0 && (retVal >>> 0) !== 0xffffffff) { const mmapFd = origArgs[4]; const mmapFlags = origArgs[3] >>> 0; - if (mmapFd >= 0 && (mmapFlags & MAP_ANONYMOUS) === 0) { - this.populateMmapFromFile(channel, retVal >>> 0, origArgs); - // Track MAP_SHARED file-backed mappings for msync writeback - if (mmapFlags & MAP_SHARED) { - const pageOffset = origArgs[5] >>> 0; - let pidMap = this.sharedMappings.get(channel.pid); - if (!pidMap) { - pidMap = new Map(); - this.sharedMappings.set(channel.pid, pidMap); + if (mmapFlags & MAP_SHARED) { + if (mmapFlags & MAP_ANONYMOUS) { + this.mapSharedAnonymousMmap(channel, retVal >>> 0, origArgs); + } else if (mmapFd >= 0) { + if (!this.mapSharedMmapFromFile(channel, retVal >>> 0, origArgs)) { + this.populateMmapFromFile(channel, retVal >>> 0, origArgs); } - pidMap.set(retVal >>> 0, { - fd: mmapFd, - fileOffset: pageOffset * 4096, - len: origArgs[1] >>> 0, - }); } + } else if (mmapFd >= 0 && (mmapFlags & MAP_ANONYMOUS) === 0) { + this.populateMmapFromFile(channel, retVal >>> 0, origArgs); } // DRI bo mmap prime: the kernel's sys_mmap on /dev/dri/{render,card} // already called `host_gbm_bo_bind` to record metadata, but the @@ -2552,6 +2589,7 @@ export class CentralizedKernelWorker { this.kernel.bos.primeBindFromSab(channel.pid, boId, channel.memory); } } + this.assertKernelStackStage("after mmap backing population", kernelStackTrace, channel, syscallNr, origArgs); // --- msync: flush MAP_SHARED regions back to file --- if (syscallNr === SYS_MSYNC && retVal === 0) { @@ -2760,6 +2798,12 @@ export class CentralizedKernelWorker { } } + if (options.syncSharedMappings !== false) { + const includeAnonymous = this.syscallSynchronizesAnonymousSharedMemory(syscallNr); + this.syncSharedMappingsFromProcess(channel, includeAnonymous); + this.refreshSharedMappingsToProcess(channel, includeAnonymous); + } + // Clear handling flag (channel is done — poller can pick it up for next syscall) channel.handling = false; @@ -7172,6 +7216,407 @@ export class CentralizedKernelWorker { } } + private mapSharedMmapFromFile( + channel: ChannelInfo, + mmapAddr: number, + origArgs: number[], + ): boolean { + const fd = origArgs[4]; + const mapLen = origArgs[1] >>> 0; + const pageOffset = origArgs[5] >>> 0; + const fileOffset = pageOffset * FILE_PAGE_SIZE; + const writable = (origArgs[2] & PROT_WRITE) !== 0; + if (mapLen === 0) return true; + + const path = this.getFdPathForSharedMapping(channel, fd); + const stat = this.getFdStatForSharedMapping(channel, fd); + if (!path || !stat) return false; + + const key = stat.key || `path:${path}`; + const backing = this.getOrCreateSharedMmapBacking(key, path, writable); + if (!backing) return false; + + try { + this.ensureBackingRangeLoaded(backing, fileOffset, mapLen); + } catch { + return false; + } + + const processMem = new Uint8Array(channel.memory.buffer); + if (mmapAddr + mapLen > processMem.length) return false; + + const initial = this.readBackingRange(backing, fileOffset, mapLen); + processMem.set(initial, mmapAddr); + + let pidMap = this.sharedMappings.get(channel.pid); + if (!pidMap) { + pidMap = new Map(); + this.sharedMappings.set(channel.pid, pidMap); + } + backing.refCount++; + pidMap.set(mmapAddr, { + fd, + fileOffset, + len: mapLen, + writable, + backingKey: key, + snapshot: initial.slice(), + version: backing.version, + }); + return true; + } + + private mapSharedAnonymousMmap( + channel: ChannelInfo, + mmapAddr: number, + origArgs: number[], + ): boolean { + const mapLen = origArgs[1] >>> 0; + const writable = (origArgs[2] & PROT_WRITE) !== 0; + if (mapLen === 0) return true; + + const processMem = new Uint8Array(channel.memory.buffer); + if (mmapAddr + mapLen > processMem.length) return false; + + const key = `anon:${channel.pid}:${mmapAddr}:${this.nextAnonymousMmapBackingId++}`; + const backing: SharedMmapBacking = { + key, + path: "", + handle: -1, + anonymous: true, + writable, + pages: new Map(), + dirtyPages: new Set(), + refCount: 0, + version: 0, + }; + this.sharedMmapBackings.set(key, backing); + + const initial = processMem.slice(mmapAddr, mmapAddr + mapLen); + this.copyRangeToBacking(backing, 0, initial, false); + + let pidMap = this.sharedMappings.get(channel.pid); + if (!pidMap) { + pidMap = new Map(); + this.sharedMappings.set(channel.pid, pidMap); + } + backing.refCount++; + pidMap.set(mmapAddr, { + fd: -1, + fileOffset: 0, + len: mapLen, + writable, + backingKey: key, + snapshot: initial, + version: backing.version, + }); + return true; + } + + private getFdStatForSharedMapping(channel: ChannelInfo, fd: number): SharedMmapFdStat | null { + const handleChannel = this.kernelInstance!.exports.kernel_handle_channel as + (offset: KernelPointer, pid: number) => number; + const kernelView = new DataView(this.kernelMemory!.buffer, this.scratchOffset); + const statPtr = this.scratchOffset + CH_DATA; + + kernelView.setUint32(CH_SYSCALL, SYS_FSTAT, true); + kernelView.setBigInt64(CH_ARGS + 0 * CH_ARG_SIZE, BigInt(fd), true); + kernelView.setBigInt64(CH_ARGS + 1 * CH_ARG_SIZE, BigInt(statPtr), true); + for (let i = 2; i < CH_ARGS_COUNT; i++) { + kernelView.setBigInt64(CH_ARGS + i * CH_ARG_SIZE, BigInt(0), true); + } + + this.currentHandlePid = channel.pid; + this.bindKernelTidForChannel(channel); + try { + handleChannel(this.toKernelPtr(this.scratchOffset), channel.pid); + } catch { + return null; + } finally { + this.currentHandlePid = 0; + } + + const retVal = Number(kernelView.getBigInt64(CH_RETURN, true)); + const errVal = kernelView.getUint32(CH_ERRNO, true); + if (retVal !== 0 || errVal !== 0) return null; + + const statView = new DataView(this.kernelMemory!.buffer, statPtr); + const dev = statView.getBigUint64(0, true); + const ino = statView.getBigUint64(8, true); + const size64 = statView.getBigUint64(32, true); + const size = size64 > BigInt(Number.MAX_SAFE_INTEGER) + ? Number.MAX_SAFE_INTEGER + : Number(size64); + const key = dev !== 0n || ino !== 0n ? `${dev.toString()}:${ino.toString()}` : ""; + return { key, size }; + } + + private getFdPathForSharedMapping(channel: ChannelInfo, fd: number): string | null { + const getFdPath = this.kernelInstance!.exports.kernel_get_fd_path as + ((pid: number, fd: number, bufPtr: KernelPointer, bufLen: number) => number) | undefined; + if (!getFdPath) return null; + + const bufPtr = this.scratchOffset + CH_DATA; + const maxLen = Math.min(4096, CH_DATA_SIZE); + const result = getFdPath(channel.pid, fd, this.toKernelPtr(bufPtr), maxLen); + if (result <= 0) return null; + + const kernelBuf = new Uint8Array(this.kernelMemory!.buffer); + return new TextDecoder().decode(kernelBuf.slice(bufPtr, bufPtr + result)); + } + + private getOrCreateSharedMmapBacking( + key: string, + path: string, + writable: boolean, + ): SharedMmapBacking | null { + const existing = this.sharedMmapBackings.get(key); + if (existing) { + if (writable && !existing.writable) { + const upgraded = this.openSharedMmapBackingHandle(path, true); + if (upgraded === null) return null; + try { + this.io.close(existing.handle); + } catch { + // Keep going: the replacement handle is already open. + } + existing.handle = upgraded; + existing.writable = true; + } + return existing; + } + + const handle = this.openSharedMmapBackingHandle(path, writable); + if (handle === null) return null; + const backing: SharedMmapBacking = { + key, + path, + handle, + anonymous: false, + writable, + pages: new Map(), + dirtyPages: new Set(), + refCount: 0, + version: 0, + }; + this.sharedMmapBackings.set(key, backing); + this.invalidateSharedMmapFdCache(); + return backing; + } + + private openSharedMmapBackingHandle(path: string, writable: boolean): number | null { + try { + return this.io.open(path, writable ? O_RDWR : O_RDONLY, 0); + } catch { + return null; + } + } + + private ensureBackingRangeLoaded(backing: SharedMmapBacking, offset: number, len: number): void { + if (len <= 0) return; + const firstPage = Math.floor(offset / FILE_PAGE_SIZE); + const lastPage = Math.floor((offset + len - 1) / FILE_PAGE_SIZE); + for (let page = firstPage; page <= lastPage; page++) { + this.ensureBackingPageLoaded(backing, page); + } + } + + private ensureBackingPageLoaded(backing: SharedMmapBacking, page: number): Uint8Array { + const existing = backing.pages.get(page); + if (existing) return existing; + const data = this.readBackingPageFromFile(backing, page); + backing.pages.set(page, data); + return data; + } + + private readBackingPageFromFile(backing: SharedMmapBacking, page: number): Uint8Array { + const data = new Uint8Array(FILE_PAGE_SIZE); + if (backing.handle < 0) { + return data; + } + try { + const bytesRead = this.io.read( + backing.handle, + data, + page * FILE_PAGE_SIZE, + FILE_PAGE_SIZE, + ); + if (bytesRead > 0 && bytesRead < FILE_PAGE_SIZE) { + data.fill(0, bytesRead); + } + } catch { + // Sparse EOF or a transient host read error leaves the page zero-filled. + } + return data; + } + + private readBackingRange(backing: SharedMmapBacking, offset: number, len: number): Uint8Array { + const out = new Uint8Array(len); + let copied = 0; + while (copied < len) { + const absolute = offset + copied; + const page = Math.floor(absolute / FILE_PAGE_SIZE); + const pageOffset = absolute % FILE_PAGE_SIZE; + const n = Math.min(FILE_PAGE_SIZE - pageOffset, len - copied); + const pageData = this.ensureBackingPageLoaded(backing, page); + out.set(pageData.subarray(pageOffset, pageOffset + n), copied); + copied += n; + } + return out; + } + + private copyRangeToBacking( + backing: SharedMmapBacking, + offset: number, + bytes: Uint8Array, + markDirty: boolean, + ): void { + let copied = 0; + while (copied < bytes.length) { + const absolute = offset + copied; + const page = Math.floor(absolute / FILE_PAGE_SIZE); + const pageOffset = absolute % FILE_PAGE_SIZE; + const n = Math.min(FILE_PAGE_SIZE - pageOffset, bytes.length - copied); + const pageData = this.ensureBackingPageLoaded(backing, page); + pageData.set(bytes.subarray(copied, copied + n), pageOffset); + if (markDirty) { + backing.dirtyPages.add(page); + } else { + backing.dirtyPages.delete(page); + } + copied += n; + } + } + + private rangeDiffersFromSnapshot( + processMem: Uint8Array, + memOffset: number, + snapshot: Uint8Array, + snapshotOffset: number, + len: number, + ): boolean { + const BufferCtor = (globalThis as { Buffer?: typeof Buffer }).Buffer; + if (BufferCtor?.compare && BufferCtor?.from) { + try { + const processView = BufferCtor.from( + processMem.buffer, + processMem.byteOffset + memOffset, + len, + ); + const snapshotView = BufferCtor.from( + snapshot.buffer, + snapshot.byteOffset + snapshotOffset, + len, + ); + return BufferCtor.compare(processView, snapshotView) !== 0; + } catch { + // Browser builds do not provide Buffer. Fall through to typed arrays. + } + } + + const processByteOffset = processMem.byteOffset + memOffset; + const snapshotByteOffset = snapshot.byteOffset + snapshotOffset; + if (((processByteOffset | snapshotByteOffset | len) & 3) === 0) { + const processWords = new Uint32Array(processMem.buffer, processByteOffset, len / 4); + const snapshotWords = new Uint32Array(snapshot.buffer, snapshotByteOffset, len / 4); + for (let i = 0; i < processWords.length; i++) { + if (processWords[i] !== snapshotWords[i]) { + return true; + } + } + return false; + } + + for (let i = 0; i < len; i++) { + if (processMem[memOffset + i] !== snapshot[snapshotOffset + i]) { + return true; + } + } + return false; + } + + private synchronizeSharedMappingsForSyscallBoundary(channel: ChannelInfo, syscallNr: number): void { + const includeAnonymous = this.syscallSynchronizesAnonymousSharedMemory(syscallNr); + this.syncSharedMappingsFromProcess(channel, includeAnonymous); + this.refreshSharedMappingsToProcess(channel, includeAnonymous); + } + + private syscallSynchronizesAnonymousSharedMemory(syscallNr: number): boolean { + return syscallNr === SYS_FORK + || syscallNr === SYS_VFORK + || syscallNr === SYS_CLONE + || syscallNr === SYS_EXIT + || syscallNr === SYS_EXIT_GROUP + || syscallNr === SYS_WAIT4 + || syscallNr === SYS_WAITID + || syscallNr === SYS_FUTEX + || syscallNr === SYS_POLL + || syscallNr === SYS_PPOLL + || syscallNr === SYS_SELECT + || syscallNr === SYS_PSELECT6 + || syscallNr === SYS_EPOLL_WAIT + || syscallNr === SYS_EPOLL_PWAIT + || syscallNr === SYS_RT_SIGTIMEDWAIT + || syscallNr === SYS_MSYNC + || syscallNr === SYS_MUNMAP + || syscallNr === SYS_MREMAP; + } + + private syncSharedMappingsFromProcess(channel: ChannelInfo, includeAnonymous = true): void { + const pidMap = this.sharedMappings.get(channel.pid); + if (!pidMap || pidMap.size === 0) return; + const processMem = new Uint8Array(channel.memory.buffer); + + for (const [mapAddr, mapping] of pidMap) { + if (!mapping.writable) continue; + const backing = this.sharedMmapBackings.get(mapping.backingKey); + if (!backing) continue; + if (backing.anonymous && !includeAnonymous) continue; + if (mapAddr + mapping.len > processMem.length) continue; + + let changed = false; + for (let offset = 0; offset < mapping.len; offset += FILE_PAGE_SIZE) { + const n = Math.min(FILE_PAGE_SIZE, mapping.len - offset); + if (!this.rangeDiffersFromSnapshot( + processMem, + mapAddr + offset, + mapping.snapshot, + offset, + n, + )) { + continue; + } + const bytes = processMem.subarray(mapAddr + offset, mapAddr + offset + n); + this.copyRangeToBacking(backing, mapping.fileOffset + offset, bytes, true); + mapping.snapshot.set(bytes, offset); + changed = true; + } + + if (changed) { + backing.version++; + mapping.version = backing.version; + } + } + } + + private refreshSharedMappingsToProcess(channel: ChannelInfo, includeAnonymous = true): void { + const pidMap = this.sharedMappings.get(channel.pid); + if (!pidMap || pidMap.size === 0) return; + const processMem = new Uint8Array(channel.memory.buffer); + + for (const [mapAddr, mapping] of pidMap) { + const backing = this.sharedMmapBackings.get(mapping.backingKey); + if (!backing || mapping.version === backing.version) continue; + if (backing.anonymous && !includeAnonymous) continue; + if (mapAddr + mapping.len > processMem.length) continue; + + const latest = this.readBackingRange(backing, mapping.fileOffset, mapping.len); + processMem.set(latest, mapAddr); + mapping.snapshot = latest.slice(); + mapping.version = backing.version; + } + } + /** * Populate a file-backed mmap region by reading from the file fd via pread. * Called after the kernel allocates the anonymous region and the host zeroes it. @@ -7325,6 +7770,300 @@ export class CentralizedKernelWorker { } } + /** + * Flush MAP_SHARED regions that overlap the msync/munmap range. + */ + private flushSharedMappings(channel: ChannelInfo, origArgs: number[]): void { + const syncAddr = origArgs[0] >>> 0; + const syncLen = origArgs[1] >>> 0; + const pidMap = this.sharedMappings.get(channel.pid); + if (!pidMap || pidMap.size === 0) return; + + const syncEnd = syncAddr + syncLen; + + for (const [mapAddr, mapping] of pidMap) { + if (!mapping.writable) continue; + const mapEnd = mapAddr + mapping.len; + // Check overlap + if (mapAddr >= syncEnd || mapEnd <= syncAddr) continue; + + // Compute overlap region + const flushStart = Math.max(syncAddr, mapAddr); + const flushEnd = Math.min(syncEnd, mapEnd); + const flushLen = flushEnd - flushStart; + if (flushLen <= 0) continue; + + const fileOffsetBase = mapping.fileOffset + (flushStart - mapAddr); + const backing = this.sharedMmapBackings.get(mapping.backingKey); + if (backing) { + this.flushBackingRange(backing, fileOffsetBase, flushLen); + } + } + } + + private flushBackingRange(backing: SharedMmapBacking, offset: number, len: number): boolean { + if (len <= 0 || backing.dirtyPages.size === 0) return true; + if (backing.handle < 0) { + backing.dirtyPages.clear(); + return true; + } + const end = offset + len; + let ok = true; + + for (const page of Array.from(backing.dirtyPages).sort((a, b) => a - b)) { + const pageStart = page * FILE_PAGE_SIZE; + const pageEnd = pageStart + FILE_PAGE_SIZE; + if (pageStart >= end || pageEnd <= offset) continue; + + const writeStart = Math.max(offset, pageStart); + const writeEnd = Math.min(end, pageEnd); + const pageData = this.ensureBackingPageLoaded(backing, page); + const source = pageData.subarray(writeStart - pageStart, writeEnd - pageStart); + if (!this.writeAllToBackingHandle(backing, source, writeStart)) { + ok = false; + continue; + } + if (writeStart <= pageStart && writeEnd >= pageEnd) { + backing.dirtyPages.delete(page); + } + } + return ok; + } + + private writeAllToBackingHandle( + backing: SharedMmapBacking, + source: Uint8Array, + fileOffset: number, + ): boolean { + let written = 0; + while (written < source.length) { + try { + const n = this.io.write( + backing.handle, + source.subarray(written), + fileOffset + written, + source.length - written, + ); + if (n <= 0) return false; + written += n; + } catch { + return false; + } + } + return true; + } + + private flushSharedMappingsBeforeFileSyscall( + channel: ChannelInfo, + syscallNr: number, + origArgs: number[], + ): void { + if (syscallNr === SYS_SENDFILE) { + this.flushSharedBackingForFd(channel, origArgs[0]); + this.flushSharedBackingForFd(channel, origArgs[1]); + return; + } + if (!this.syscallTouchesFdStorageBeforeKernel(syscallNr)) return; + this.flushSharedBackingForFd(channel, origArgs[0]); + } + + private flushSharedBackingForFd(channel: ChannelInfo, fd: number): void { + if (fd < 0) return; + const backing = this.findSharedBackingForFd(channel, fd); + if (backing && backing.dirtyPages.size > 0) { + this.flushBackingRange(backing, 0, Number.MAX_SAFE_INTEGER); + } + } + + private syscallTouchesFdStorageBeforeKernel(syscallNr: number): boolean { + return syscallNr === SYS_READ + || syscallNr === SYS_PREAD + || syscallNr === SYS_READV + || syscallNr === SYS_PREADV + || syscallNr === SYS_WRITE + || syscallNr === SYS_PWRITE + || syscallNr === SYS_WRITEV + || syscallNr === SYS_PWRITEV + || syscallNr === SYS_FSYNC + || syscallNr === SYS_FDATASYNC + || syscallNr === SYS_FTRUNCATE + || syscallNr === SYS_CLOSE; + } + + private handleSharedMappingsAfterFileSyscall( + channel: ChannelInfo, + syscallNr: number, + origArgs: number[], + retVal: number, + errVal: number, + ): void { + if (errVal !== 0) return; + if (syscallNr === SYS_CLOSE && retVal === 0) { + this.invalidateSharedMmapFdCache(channel.pid, origArgs[0]); + return; + } + if (syscallNr === SYS_DUP && retVal >= 0) { + this.invalidateSharedMmapFdCache(channel.pid, retVal); + return; + } + if ((syscallNr === SYS_DUP2 || syscallNr === SYS_DUP3) && retVal >= 0) { + this.invalidateSharedMmapFdCache(channel.pid, origArgs[1]); + return; + } + if (syscallNr === SYS_FCNTL && retVal >= 0) { + const cmd = origArgs[1] >>> 0; + if (cmd === F_DUPFD || cmd === F_DUPFD_CLOEXEC || cmd === F_DUPFD_CLOFORK) { + this.invalidateSharedMmapFdCache(channel.pid, retVal); + return; + } + } + if (syscallNr === SYS_PWRITE && retVal > 0) { + this.updateSharedBackingFromProcessBuffer( + channel, + origArgs[0], + origArgs[1] >>> 0, + retVal, + origArgs[3], + ); + return; + } + if (syscallNr === SYS_WRITE && retVal > 0) { + this.reloadSharedBackingForFd(channel, origArgs[0]); + return; + } + if ((syscallNr === SYS_WRITEV || syscallNr === SYS_PWRITEV) && retVal > 0) { + this.reloadSharedBackingForFd(channel, origArgs[0]); + return; + } + if (syscallNr === SYS_SENDFILE && retVal > 0) { + this.reloadSharedBackingForFd(channel, origArgs[0]); + return; + } + if (syscallNr === SYS_FTRUNCATE && retVal === 0) { + this.reloadSharedBackingForFd(channel, origArgs[0]); + } + } + + private syncSharedMappingsAfterDirectFileSyscall( + channel: ChannelInfo, + syscallNr: number, + origArgs: number[], + retVal: number, + errVal: number, + ): void { + this.handleSharedMappingsAfterFileSyscall(channel, syscallNr, origArgs, retVal, errVal); + this.syncSharedMappingsFromProcess(channel, false); + this.refreshSharedMappingsToProcess(channel, false); + } + + private updateSharedBackingFromProcessBuffer( + channel: ChannelInfo, + fd: number, + ptr: number, + len: number, + fileOffset: number, + ): void { + if (len <= 0) return; + const backing = this.findSharedBackingForFd(channel, fd); + if (!backing) return; + const processMem = new Uint8Array(channel.memory.buffer); + if (ptr + len > processMem.length) { + this.reloadSharedBackingRange(backing, fileOffset, len); + return; + } + this.copyRangeToBacking( + backing, + fileOffset, + processMem.subarray(ptr, ptr + len), + false, + ); + backing.version++; + } + + private reloadSharedBackingForFd(channel: ChannelInfo, fd: number): void { + const backing = this.findSharedBackingForFd(channel, fd); + if (!backing) return; + const loadedPages = Array.from(backing.pages.keys()); + if (loadedPages.length === 0) return; + for (const page of loadedPages) { + backing.pages.set(page, this.readBackingPageFromFile(backing, page)); + backing.dirtyPages.delete(page); + } + backing.version++; + } + + private reloadSharedBackingRange(backing: SharedMmapBacking, offset: number, len: number): void { + if (len <= 0) return; + const firstPage = Math.floor(offset / FILE_PAGE_SIZE); + const lastPage = Math.floor((offset + len - 1) / FILE_PAGE_SIZE); + let reloaded = false; + for (let page = firstPage; page <= lastPage; page++) { + if (!backing.pages.has(page)) continue; + backing.pages.set(page, this.readBackingPageFromFile(backing, page)); + backing.dirtyPages.delete(page); + reloaded = true; + } + if (reloaded) backing.version++; + } + + private findSharedBackingForFd(channel: ChannelInfo, fd: number): SharedMmapBacking | null { + if (this.sharedMmapBackings.size === 0) return null; + const cacheKey = this.sharedMmapFdCacheKey(channel.pid, fd); + const cached = this.sharedMmapFdCache.get(cacheKey); + if (cached) { + return cached.backingKey === null + ? null + : this.sharedMmapBackings.get(cached.backingKey) ?? null; + } + + const stat = this.getFdStatForSharedMapping(channel, fd); + if (stat?.key) { + const backing = this.sharedMmapBackings.get(stat.key); + if (backing) { + this.sharedMmapFdCache.set(cacheKey, { backingKey: backing.key }); + return backing; + } + } + const path = this.getFdPathForSharedMapping(channel, fd); + if (!path) { + this.sharedMmapFdCache.set(cacheKey, { backingKey: null }); + return null; + } + const backing = this.sharedMmapBackings.get(`path:${path}`) ?? null; + this.sharedMmapFdCache.set(cacheKey, { backingKey: backing?.key ?? null }); + return backing; + } + + private sharedMmapFdCacheKey(pid: number, fd: number): string { + return `${pid}:${fd}`; + } + + private invalidateSharedMmapFdCache(pid?: number, fd?: number): void { + if (pid === undefined || fd === undefined) { + this.sharedMmapFdCache.clear(); + return; + } + this.sharedMmapFdCache.delete(this.sharedMmapFdCacheKey(pid, fd)); + } + + private releaseSharedMapping(mapping: SharedMmapMapping): void { + const backing = this.sharedMmapBackings.get(mapping.backingKey); + if (!backing) return; + backing.refCount = Math.max(0, backing.refCount - 1); + if (backing.refCount > 0) return; + + this.flushBackingRange(backing, 0, Number.MAX_SAFE_INTEGER); + if (backing.handle >= 0) { + try { + this.io.close(backing.handle); + } catch { + // The kernel should not fail teardown because a host close raced. + } + } + this.sharedMmapBackings.delete(backing.key); + this.invalidateSharedMmapFdCache(); + } + /** * Remove shared mapping entries that overlap the munmap range. */ diff --git a/host/test/mmap-shared.test.ts b/host/test/mmap-shared.test.ts index 98c94be9b..e0aa64976 100644 --- a/host/test/mmap-shared.test.ts +++ b/host/test/mmap-shared.test.ts @@ -1,8 +1,22 @@ import { describe, it, expect } from "vitest"; +import { existsSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; import { runCentralizedProgram } from "./centralized-test-helper"; import { resolveBinary } from "../src/binary-resolver"; import { NodePlatformIO } from "../src/platform/node"; +const __dirname = dirname(fileURLToPath(import.meta.url)); +const repoRoot = join(__dirname, "../.."); +const crossProcessFixture = join(repoRoot, "examples/mmap_shared_cross_process.wasm"); +const anonymousForkFixture = join(repoRoot, "examples/mmap_shared_anonymous_fork.wasm"); +const munmapReuseFixture = join(repoRoot, "examples/mmap_shared_munmap_reuse.wasm"); +const largePwriteFixture = join(repoRoot, "examples/mmap_shared_large_pwrite.wasm"); +const itIfCrossProcessFixture = existsSync(crossProcessFixture) ? it : it.skip; +const itIfAnonymousForkFixture = existsSync(anonymousForkFixture) ? it : it.skip; +const itIfMunmapReuseFixture = existsSync(munmapReuseFixture) ? it : it.skip; +const itIfLargePwriteFixture = existsSync(largePwriteFixture) ? it : it.skip; + describe("MAP_SHARED mmap + msync", () => { it("writes through MAP_SHARED mapping and flushes with msync", async () => { const result = await runCentralizedProgram({ @@ -18,4 +32,50 @@ describe("MAP_SHARED mmap + msync", () => { expect(result.stdout).toContain("mremap ok"); expect(result.stdout).toContain("PASS"); }); + + itIfCrossProcessFixture("keeps file-backed MAP_SHARED mappings coherent across processes", async () => { + const result = await runCentralizedProgram({ + programPath: crossProcessFixture, + io: new NodePlatformIO(), + timeout: 10000, + }); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("inherited mapping coherent"); + expect(result.stdout).toContain("separate mapping coherent"); + expect(result.stdout).toContain("PASS"); + }); + + itIfAnonymousForkFixture("keeps anonymous MAP_SHARED mappings coherent after fork", async () => { + const result = await runCentralizedProgram({ + programPath: anonymousForkFixture, + io: new NodePlatformIO(), + timeout: 10000, + }); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("inherited anonymous mapping coherent"); + expect(result.stdout).toContain("reused anonymous backing coherent"); + expect(result.stdout).toContain("PASS"); + }); + + itIfMunmapReuseFixture("drops page-rounded MAP_SHARED mappings before anonymous address reuse", async () => { + const result = await runCentralizedProgram({ + programPath: munmapReuseFixture, + io: new NodePlatformIO(), + timeout: 10000, + }); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("partial munmap cleanup ok"); + expect(result.stdout).toContain("PASS"); + }); + + itIfLargePwriteFixture("refreshes MAP_SHARED mappings after large pwrite", async () => { + const result = await runCentralizedProgram({ + programPath: largePwriteFixture, + io: new NodePlatformIO(), + timeout: 10000, + }); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("large pwrite mapping coherent"); + expect(result.stdout).toContain("PASS"); + }); }); From 74f7375d6148cef4ee6dcd5367bbe5a1672790c7 Mon Sep 17 00:00:00 2001 From: Kandelo Agent Date: Mon, 15 Jun 2026 17:35:31 +0000 Subject: [PATCH 3/8] fix: honor mmap address hints (cherry picked from commit 079b0897b02006cf2771dab4f855eabdad81b62c) --- crates/kernel/src/memory.rs | 99 +++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 3 deletions(-) diff --git a/crates/kernel/src/memory.rs b/crates/kernel/src/memory.rs index d63cb47ac..408c4bd03 100644 --- a/crates/kernel/src/memory.rs +++ b/crates/kernel/src/memory.rs @@ -133,9 +133,27 @@ impl MemoryManager { }); hint } else { - // Find first gap in [mmap_base, max_addr) that fits aligned_len. - // Mappings are kept sorted by address. - match self.find_gap(aligned_len) { + // Without MAP_FIXED, POSIX treats a non-null address as a hint: + // the implementation may choose another address, but if the + // hinted page-aligned range is free and inside the normal mmap + // arena, using it preserves standard allocator expectations. In + // particular, allocators commonly try to extend a mapping by + // issuing mmap(old_end, delta, ...) without MAP_FIXED so the call + // fails safely instead of clobbering an occupied range. + let hinted_addr = if hint != 0 + && hint & 0xFFFF == 0 + && hint >= self.mmap_base.max(self.program_break) + && !self.overlaps_brk_heap(hint, aligned_len) + && self.can_grow_at(hint, aligned_len) + { + Some(hint) + } else { + None + }; + + // Otherwise find the first gap in [mmap_base, max_addr) that fits + // aligned_len. Mappings are kept sorted by address. + match hinted_addr.or_else(|| self.find_gap(aligned_len)) { Some(a) => a, None => return wasm_posix_shared::mmap::MAP_FAILED, } @@ -650,6 +668,81 @@ mod tests { assert_eq!(addr2, addr + 0x10000); } + #[test] + fn test_mmap_gap_scan_merges_guest_and_reserved_regions() { + let mut mm = MemoryManager::new(); + let rw = PROT_READ | PROT_WRITE; + let anon = MAP_PRIVATE | MAP_ANONYMOUS; + let base = MemoryManager::MMAP_BASE; + + // Interleave a guest mapping and a host-reserved control range. The + // automatic mmap scan must consider both address-ordered streams and + // return the first real gap, not a range that is free in only one list. + assert_eq!( + mm.mmap_anonymous(base, 0x10000, rw, anon | MAP_FIXED), + base + ); + assert_eq!( + mm.reserve_host_region_at(base + 0x10000, 0x10000), + base + 0x10000 + ); + + let addr = mm.mmap_anonymous(0, 0x10000, rw, anon); + assert_eq!(addr, base + 0x20000); + } + + #[test] + fn test_mmap_non_fixed_honors_free_address_hint() { + let mut mm = MemoryManager::new(); + let rw = PROT_READ | PROT_WRITE; + let anon = MAP_PRIVATE | MAP_ANONYMOUS; + let base = MemoryManager::MMAP_BASE; + + assert_eq!(mm.mmap_anonymous(base, 0x10000, rw, anon | MAP_FIXED), base); + assert_eq!( + mm.mmap_anonymous(base + 0x20000, 0x10000, rw, anon | MAP_FIXED), + base + 0x20000 + ); + + // There is an earlier first-fit gap at base+0x10000, but a non-fixed + // hint at base+0x30000 is free. Prefer the usable hint instead of + // treating non-fixed hints as if they were NULL. + assert_eq!( + mm.mmap_anonymous(base + 0x30000, 0x10000, rw, anon), + base + 0x30000 + ); + + // An occupied hint is only a hint; fall back to the ordinary first-fit + // address without replacing the existing mapping. + assert_eq!( + mm.mmap_anonymous(base + 0x20000, 0x10000, rw, anon), + base + 0x10000 + ); + assert!(mm.is_mapped(base + 0x20000)); + } + + #[test] + fn test_munmap_rounds_length_up_to_page() { + let mut mm = MemoryManager::new(); + let rw = PROT_READ | PROT_WRITE; + let anon = MAP_PRIVATE | MAP_ANONYMOUS; + + // SQLite sysfault.test allocates this size and later munmaps a + // different, still page-rounded length. Both must cover the same + // kernel mapping, otherwise a tiny tail fragment prevents reuse. + let requested_len = 0x1ea102e; + let munmap_len = 0x1ea2000; + + let addr = mm.mmap_anonymous(0, requested_len, rw, anon); + assert_ne!(addr, MAP_FAILED); + assert!(mm.munmap(addr, munmap_len)); + assert!(!mm.is_mapped(addr)); + assert!(!mm.is_mapped(addr + munmap_len)); + + let reused = mm.mmap_anonymous(0, requested_len, rw, anon); + assert_eq!(reused, addr); + } + #[test] fn test_brk() { let mut mm = MemoryManager::new(); From 940d8856853de630f71168676919fd2284dbe545 Mon Sep 17 00:00:00 2001 From: Kandelo Agent Date: Tue, 16 Jun 2026 01:40:02 +0000 Subject: [PATCH 4/8] fix: improve shared memory and fpm socket semantics (cherry picked from commit 2b33f627cc1dbdc5669c20809ae70e3f32856973) --- crates/kernel/src/socket.rs | 4 + crates/kernel/src/syscalls.rs | 219 ++++++++++++++++++++++--- crates/kernel/src/wasm_api.rs | 202 +++++++++++++++++++++-- host/src/kernel-worker.ts | 135 ++++++++++++--- host/test/select-timeout-retry.test.ts | 92 +++++++++++ 5 files changed, 586 insertions(+), 66 deletions(-) diff --git a/crates/kernel/src/socket.rs b/crates/kernel/src/socket.rs index 674cb6495..4b3690dec 100644 --- a/crates/kernel/src/socket.rs +++ b/crates/kernel/src/socket.rs @@ -544,6 +544,10 @@ impl SocketTable { /// A pending TCP connection waiting in a shared accept queue. pub struct PendingConnection { pub peer_addr: [u8; 4], + /// IPv6 peer address when this pending connection originated from an + /// AF_INET6 client. `None` on an AF_INET6 listener means the peer was an + /// IPv4 client and must be exposed as an IPv4-mapped IPv6 address. + pub peer_addr6: Option<[u8; 16]>, pub peer_port: u16, /// Recv pipe index (in the global pipe table). Host writes incoming /// TCP data here; the accepting process reads from it. diff --git a/crates/kernel/src/syscalls.rs b/crates/kernel/src/syscalls.rs index e03696bdb..99c23de01 100644 --- a/crates/kernel/src/syscalls.rs +++ b/crates/kernel/src/syscalls.rs @@ -6216,11 +6216,21 @@ fn is_loopback_addr(addr: [u8; 4]) -> bool { addr[0] == 127 } -fn is_loopback_addr6(addr: [u8; 16]) -> bool { +pub(crate) fn is_loopback_addr6(addr: [u8; 16]) -> bool { addr == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] } -fn is_unspecified_addr6(addr: [u8; 16]) -> bool { +pub(crate) fn loopback_addr6() -> [u8; 16] { + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] +} + +pub(crate) fn ipv4_mapped_addr6(addr: [u8; 4]) -> [u8; 16] { + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, addr[0], addr[1], addr[2], addr[3], + ] +} + +pub(crate) fn is_unspecified_addr6(addr: [u8; 16]) -> bool { addr == [0; 16] } @@ -8017,10 +8027,26 @@ pub fn sys_accept(proc: &mut Process, _host: &mut dyn HostIO, fd: i32) -> Result accepted.state = SocketState::Connected; accepted.recv_buf_idx = Some(pc.recv_pipe_idx); accepted.send_buf_idx = Some(pc.send_pipe_idx); - accepted.bind_addr = bind_addr; - accepted.bind_port = bind_port; - accepted.peer_addr = pc.peer_addr; - accepted.peer_port = pc.peer_port; + match domain { + SocketDomain::Inet => { + accepted.bind_addr = bind_addr; + accepted.bind_port = bind_port; + accepted.peer_addr = pc.peer_addr; + accepted.peer_port = pc.peer_port; + } + SocketDomain::Inet6 => { + accepted.bind_addr6 = bind_addr6; + accepted.bind_port = bind_port; + accepted.peer_addr6 = pc + .peer_addr6 + .unwrap_or_else(|| ipv4_mapped_addr6(pc.peer_addr)); + accepted.peer_port = pc.peer_port; + } + SocketDomain::Unix => { + accepted.bind_path = bind_path; + } + SocketDomain::Netlink => return Err(Errno::EOPNOTSUPP), + } accepted.global_pipes = true; let accepted_sock_idx = proc.sockets.alloc(accepted); let host_handle = -((accepted_sock_idx as i64) + 1); @@ -8478,33 +8504,55 @@ pub fn sys_connect( let pipe_a_idx = pipe_table.alloc(PipeBuffer::new(65536)); let pipe_b_idx = pipe_table.alloc(PipeBuffer::new(65536)); - // Create accepted socket (server side) - let mut accepted_sock = SocketInfo::new(SocketDomain::Unix, SocketType::Stream, 0); - accepted_sock.state = SocketState::Connected; - accepted_sock.recv_buf_idx = Some(pipe_a_idx); // reads client's writes - accepted_sock.send_buf_idx = Some(pipe_b_idx); // writes to client's reads - accepted_sock.global_pipes = true; - let accepted_idx = proc.sockets.alloc(accepted_sock); - - // Push to listener's backlog let listener = proc .sockets - .get_mut(listener_sock_idx) + .get(listener_sock_idx) .ok_or(Errno::EBADF)?; - listener.listen_backlog.push(accepted_idx); let accept_wake_idx = listener.accept_wake_idx; - // Set up client socket - let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; - client.send_buf_idx = Some(pipe_a_idx); // writes to pipe_a (server's reads) - client.recv_buf_idx = Some(pipe_b_idx); // reads from pipe_b (server's writes) - client.state = SocketState::Connected; - client.peer_idx = Some(accepted_idx); - client.global_pipes = true; + if let Some(shared_idx) = listener.shared_backlog_idx { + let pc = crate::socket::PendingConnection { + peer_addr: [0, 0, 0, 0], + peer_addr6: None, + peer_port: 0, + recv_pipe_idx: pipe_a_idx, // server reads client's writes + send_pipe_idx: pipe_b_idx, // server writes to client's reads + }; + if !unsafe { crate::socket::shared_listener_backlog_table().push(shared_idx, pc) } + { + return Err(Errno::ECONNREFUSED); + } + let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + client.send_buf_idx = Some(pipe_a_idx); + client.recv_buf_idx = Some(pipe_b_idx); + client.state = SocketState::Connected; + client.global_pipes = true; + } else { + // Legacy fallback for listeners without a shared queue. + let mut accepted_sock = + SocketInfo::new(SocketDomain::Unix, SocketType::Stream, 0); + accepted_sock.state = SocketState::Connected; + accepted_sock.recv_buf_idx = Some(pipe_a_idx); + accepted_sock.send_buf_idx = Some(pipe_b_idx); + accepted_sock.global_pipes = true; + let accepted_idx = proc.sockets.alloc(accepted_sock); + + let listener = proc + .sockets + .get_mut(listener_sock_idx) + .ok_or(Errno::EBADF)?; + listener.listen_backlog.push(accepted_idx); - // Set peer_idx on accepted socket - let accepted = proc.sockets.get_mut(accepted_idx).ok_or(Errno::EBADF)?; - accepted.peer_idx = Some(sock_idx); + let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + client.send_buf_idx = Some(pipe_a_idx); + client.recv_buf_idx = Some(pipe_b_idx); + client.state = SocketState::Connected; + client.peer_idx = Some(accepted_idx); + client.global_pipes = true; + + let accepted = proc.sockets.get_mut(accepted_idx).ok_or(Errno::EBADF)?; + accepted.peer_idx = Some(sock_idx); + } if let Some(idx) = accept_wake_idx { crate::wakeup::push_accept(idx); @@ -19085,6 +19133,123 @@ mod tests { ); } + #[test] + fn test_inet6_accept_reports_ipv4_mapped_and_native_ipv6_peers() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use crate::pipe::PipeBuffer; + use wasm_posix_shared::socket::*; + + let fd = sys_socket(&mut proc, &mut host, AF_INET6, SOCK_STREAM, 0).unwrap(); + let mut addr = [0u8; 28]; + addr[0] = 10; // AF_INET6 + addr[2] = 0x19; + addr[3] = 0x46; // port 6470 + // Bind to :: so the listener is dual-stack. IPv4 peers accepted on an + // AF_INET6 socket are observed through getpeername(2) as + // IPv4-mapped IPv6 addresses; native IPv6 peers remain native. + sys_bind(&mut proc, &mut host, fd, &addr).unwrap(); + sys_listen(&mut proc, &mut host, fd, 5).unwrap(); + + let listener_entry = proc.fd_table.get(fd).unwrap(); + let listener_ofd = proc.ofd_table.get(listener_entry.ofd_ref.0).unwrap(); + let listener_idx = (-(listener_ofd.host_handle + 1)) as usize; + let shared_idx = proc + .sockets + .get(listener_idx) + .unwrap() + .shared_backlog_idx + .unwrap(); + + let pipe_table = unsafe { crate::pipe::global_pipe_table() }; + let (recv4, send4) = + pipe_table.alloc_pair(PipeBuffer::new(1024), PipeBuffer::new(1024)); + unsafe { crate::socket::shared_listener_backlog_table() }.push( + shared_idx, + crate::socket::PendingConnection { + peer_addr: [127, 0, 0, 1], + peer_addr6: None, + peer_port: 50000, + recv_pipe_idx: recv4, + send_pipe_idx: send4, + }, + ); + let accepted4_fd = sys_accept(&mut proc, &mut host, fd).unwrap(); + let accepted4_entry = proc.fd_table.get(accepted4_fd).unwrap(); + let accepted4_ofd = proc.ofd_table.get(accepted4_entry.ofd_ref.0).unwrap(); + let accepted4_idx = (-(accepted4_ofd.host_handle + 1)) as usize; + assert_eq!( + proc.sockets.get(accepted4_idx).unwrap().peer_addr6, + ipv4_mapped_addr6([127, 0, 0, 1]), + ); + + let (recv6, send6) = + pipe_table.alloc_pair(PipeBuffer::new(1024), PipeBuffer::new(1024)); + unsafe { crate::socket::shared_listener_backlog_table() }.push( + shared_idx, + crate::socket::PendingConnection { + peer_addr: [0, 0, 0, 0], + peer_addr6: Some(loopback_addr6()), + peer_port: 50001, + recv_pipe_idx: recv6, + send_pipe_idx: send6, + }, + ); + let accepted6_fd = sys_accept(&mut proc, &mut host, fd).unwrap(); + let accepted6_entry = proc.fd_table.get(accepted6_fd).unwrap(); + let accepted6_ofd = proc.ofd_table.get(accepted6_entry.ofd_ref.0).unwrap(); + let accepted6_idx = (-(accepted6_ofd.host_handle + 1)) as usize; + assert_eq!(proc.sockets.get(accepted6_idx).unwrap().peer_addr6, loopback_addr6()); + } + + #[test] + fn test_inet6_loopback_listen_registers_host_transport() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + use wasm_posix_shared::socket::*; + + let fd = sys_socket(&mut proc, &mut host, AF_INET6, SOCK_STREAM, 0).unwrap(); + let mut addr = [0u8; 28]; + addr[0] = 10; // AF_INET6 + addr[2] = 0x19; + addr[3] = 0x44; // port 6468 + addr[23] = 1; // ::1 + + sys_bind(&mut proc, &mut host, fd, &addr).unwrap(); + sys_listen(&mut proc, &mut host, fd, 5).unwrap(); + + assert_eq!(host.net_listen_calls, vec![(fd, 6468, [127, 0, 0, 1])]); + } + + #[test] + fn test_inet6_loopback_cross_process_connect_uses_host_transport() { + let mut proc = Process::new(1); + let mut host = MockHostIO::new(); + host.net_connect_result = Ok(()); + host.net_connect_status_result = Ok(()); + use wasm_posix_shared::socket::*; + + let fd = sys_socket(&mut proc, &mut host, AF_INET6, SOCK_STREAM, 0).unwrap(); + let mut addr = [0u8; 28]; + addr[0] = 10; // AF_INET6 + addr[2] = 0x19; + addr[3] = 0x45; // port 6469 + addr[23] = 1; // ::1 + + sys_connect(&mut proc, &mut host, fd, &addr).unwrap(); + + assert_eq!(host.net_connect_calls, vec![(0, vec![127, 0, 0, 1], 6469)]); + let entry = proc.fd_table.get(fd).unwrap(); + let ofd = proc.ofd_table.get(entry.ofd_ref.0).unwrap(); + let sock_idx = (-(ofd.host_handle + 1)) as usize; + let sock = proc.sockets.get(sock_idx).unwrap(); + assert_eq!(sock.state, crate::socket::SocketState::Connected); + assert_eq!(sock.peer_addr6, loopback_addr6()); + assert_eq!(sock.peer_port, 6469); + assert_eq!(sock.bind_addr6, loopback_addr6()); + assert_ne!(sock.bind_port, 0); + } + // ── Threading tests ────────────────────────────────────────────── #[test] diff --git a/crates/kernel/src/wasm_api.rs b/crates/kernel/src/wasm_api.rs index 5730eccd0..b7fec50fa 100644 --- a/crates/kernel/src/wasm_api.rs +++ b/crates/kernel/src/wasm_api.rs @@ -7181,6 +7181,28 @@ pub extern "C" fn kernel_connect(fd: i32, addr_ptr: *const u8, addr_len: u32) -> let (_gkl, proc) = unsafe { get_process() }; let mut host = WasmHostIO; let addr = unsafe { slice::from_raw_parts(addr_ptr, addr_len as usize) }; + if crate::is_centralized_mode() && addr_len >= 28 { + let family = u16::from_le_bytes([addr[0], addr[1]]); + if family == 10 { + let mut ip6 = [0u8; 16]; + ip6.copy_from_slice(&addr[8..24]); + if crate::syscalls::is_loopback_addr6(ip6) + || crate::syscalls::is_unspecified_addr6(ip6) + { + match cross_process_loopback_connect6(proc, fd, addr) { + Ok(()) => { + deliver_pending_signals(proc, &mut host); + return 0; + } + Err(Errno::ECONNREFUSED) => {} + Err(e) => { + deliver_pending_signals(proc, &mut host); + return -(e as i32); + } + } + } + } + } let result = match syscalls::sys_connect(proc, &mut host, fd, addr) { Ok(()) => 0, Err(Errno::ECONNREFUSED) if addr_len >= 3 => { @@ -7261,7 +7283,10 @@ fn cross_process_loopback_connect(proc: &mut Process, fd: i32, addr: &[u8]) -> R if s.state == SocketState::Listening && s.bind_port == port && s.sock_type == SocketType::Stream - && (s.bind_addr == [0, 0, 0, 0] || s.bind_addr == [127, 0, 0, 1]) + && ((s.domain == crate::socket::SocketDomain::Inet + && (s.bind_addr == [0, 0, 0, 0] || s.bind_addr == [127, 0, 0, 1])) + || (s.domain == crate::socket::SocketDomain::Inet6 + && crate::syscalls::is_unspecified_addr6(s.bind_addr6))) { listener_pid = Some(pid); listener_sock_idx = Some(idx); @@ -7330,6 +7355,7 @@ fn cross_process_loopback_connect(proc: &mut Process, fd: i32, addr: &[u8]) -> R let pc = crate::socket::PendingConnection { peer_addr: client_addr, + peer_addr6: None, peer_port: client_port, recv_pipe_idx: pipe_a_idx, // server reads client's writes send_pipe_idx: pipe_b_idx, // server writes to client's reads @@ -7345,7 +7371,136 @@ fn cross_process_loopback_connect(proc: &mut Process, fd: i32, addr: &[u8]) -> R Ok(()) } -/// Cross-process AF_UNIX connect. +/// Cross-process loopback TCP connect for AF_INET6 (centralized mode only). +/// +/// Searches all processes for a matching AF_INET6 listener and queues a +/// pending connection carrying the real IPv6 peer address. This avoids routing +/// guest ::1 connections through the host IPv4 bridge, where they are +/// indistinguishable from IPv4 clients and get reported to acceptors as the +/// wrong peer address. +fn cross_process_loopback_connect6(proc: &mut Process, fd: i32, addr: &[u8]) -> Result<(), Errno> { + use crate::pipe::PipeBuffer; + use crate::socket::{SocketDomain, SocketState, SocketType}; + + if addr.len() < 28 { + return Err(Errno::EINVAL); + } + let port = u16::from_be_bytes([addr[2], addr[3]]); + let mut ip6 = [0u8; 16]; + ip6.copy_from_slice(&addr[8..24]); + let dst_ip6 = if crate::syscalls::is_unspecified_addr6(ip6) { + crate::syscalls::loopback_addr6() + } else { + ip6 + }; + + let entry = proc.fd_table.get(fd)?; + let ofd = proc.ofd_table.get(entry.ofd_ref.0).ok_or(Errno::EBADF)?; + let sock_idx = (-(ofd.host_handle + 1)) as usize; + + { + let sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + if sock.sock_type == SocketType::Dgram { + return Err(Errno::ECONNREFUSED); + } + if sock.sock_type != SocketType::Stream || sock.domain != SocketDomain::Inet6 { + return Err(Errno::EOPNOTSUPP); + } + } + + let table = unsafe { &mut *PROCESS_TABLE.0.get() }; + let my_pid = proc.pid; + let mut listener_pid: Option = None; + let mut listener_sock_idx: Option = None; + + for (&pid, target_proc) in table.processes.iter().rev() { + if pid == my_pid { + continue; + } + for idx in 0..target_proc.sockets.len() { + if let Some(s) = target_proc.sockets.get(idx) { + if s.domain == SocketDomain::Inet6 + && s.state == SocketState::Listening + && s.bind_port == port + && s.sock_type == SocketType::Stream + && (crate::syscalls::is_unspecified_addr6(s.bind_addr6) + || s.bind_addr6 == dst_ip6) + { + listener_pid = Some(pid); + listener_sock_idx = Some(idx); + break; + } + } + } + if listener_pid.is_some() { + break; + } + } + + let listener_pid = listener_pid.ok_or(Errno::ECONNREFUSED)?; + let listener_sock_idx = listener_sock_idx.ok_or(Errno::ECONNREFUSED)?; + + let pipe_table = unsafe { crate::pipe::global_pipe_table() }; + let pipe_a_idx = pipe_table.alloc(PipeBuffer::new(65536)); + let pipe_b_idx = pipe_table.alloc(PipeBuffer::new(65536)); + + let proc = table.get_mut(my_pid).ok_or(Errno::ESRCH)?; + let client_sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + let client_addr6 = if crate::syscalls::is_unspecified_addr6(client_sock.bind_addr6) { + crate::syscalls::loopback_addr6() + } else { + client_sock.bind_addr6 + }; + let mut client_port = client_sock.bind_port; + if client_port == 0 { + client_port = proc.next_ephemeral_port; + proc.next_ephemeral_port = proc.next_ephemeral_port.wrapping_add(1); + if proc.next_ephemeral_port == 0 { + proc.next_ephemeral_port = 49152; + } + } + + let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + client.send_buf_idx = Some(pipe_a_idx); + client.recv_buf_idx = Some(pipe_b_idx); + client.state = SocketState::Connected; + client.peer_addr6 = dst_ip6; + client.peer_port = port; + client.global_pipes = true; + if client.bind_port == 0 { + client.bind_port = client_port; + client.bind_addr6 = client_addr6; + } + + let listener_proc = table.get_mut(listener_pid).ok_or(Errno::ESRCH)?; + let listener_sock = listener_proc + .sockets + .get(listener_sock_idx) + .ok_or(Errno::ECONNREFUSED)?; + let shared_idx = listener_sock + .shared_backlog_idx + .ok_or(Errno::ECONNREFUSED)?; + let accept_wake_idx = listener_sock.accept_wake_idx; + + let pc = crate::socket::PendingConnection { + peer_addr: [0, 0, 0, 0], + peer_addr6: Some(client_addr6), + peer_port: client_port, + recv_pipe_idx: pipe_a_idx, + send_pipe_idx: pipe_b_idx, + }; + if !unsafe { crate::socket::shared_listener_backlog_table().push(shared_idx, pc) } { + return Err(Errno::ECONNREFUSED); + } + + if let Some(idx) = accept_wake_idx { + crate::wakeup::push_accept(idx); + } + + Ok(()) +} + +/// Cross-process AF_UNIX connect (centralized mode only). /// /// Looks up the target path in the global UnixSocketRegistry, then creates /// global pipe pairs to connect the client (current process) to the listener @@ -7405,21 +7560,41 @@ fn cross_process_unix_connect(proc: &mut Process, fd: i32, addr: &[u8]) -> Resul return Err(Errno::ECONNREFUSED); } - // Create accepted socket in the listener's process - let mut accepted_sock = SocketInfo::new(SocketDomain::Unix, SocketType::Stream, 0); - accepted_sock.state = SocketState::Connected; - accepted_sock.recv_buf_idx = Some(pipe_a_idx); - accepted_sock.send_buf_idx = Some(pipe_b_idx); - accepted_sock.global_pipes = true; - let accepted_idx = listener_proc.sockets.alloc(accepted_sock); - - // Push to listener's backlog + // Queue the connection on the listener's shared accept queue when + // available. POSIX listener fds inherited across fork/spawn share the same + // underlying socket queue, so the accepted socket must be materialized in + // whichever process actually calls accept(). let listener = listener_proc .sockets - .get_mut(listener_sock_idx) + .get(listener_sock_idx) .ok_or(Errno::EBADF)?; - listener.listen_backlog.push(accepted_idx); let accept_wake_idx = listener.accept_wake_idx; + if let Some(shared_idx) = listener.shared_backlog_idx { + let pc = crate::socket::PendingConnection { + peer_addr: [0, 0, 0, 0], + peer_addr6: None, + peer_port: 0, + recv_pipe_idx: pipe_a_idx, + send_pipe_idx: pipe_b_idx, + }; + if !unsafe { crate::socket::shared_listener_backlog_table().push(shared_idx, pc) } { + return Err(Errno::ECONNREFUSED); + } + } else { + // Legacy fallback for listeners without a shared queue. + let mut accepted_sock = SocketInfo::new(SocketDomain::Unix, SocketType::Stream, 0); + accepted_sock.state = SocketState::Connected; + accepted_sock.recv_buf_idx = Some(pipe_a_idx); + accepted_sock.send_buf_idx = Some(pipe_b_idx); + accepted_sock.global_pipes = true; + let accepted_idx = listener_proc.sockets.alloc(accepted_sock); + + let listener = listener_proc + .sockets + .get_mut(listener_sock_idx) + .ok_or(Errno::EBADF)?; + listener.listen_backlog.push(accepted_idx); + } // Set up client socket (in current process) let client_proc = table.get_mut(my_pid).ok_or(Errno::ESRCH)?; @@ -9918,6 +10093,7 @@ pub extern "C" fn kernel_inject_connection( peer_addr_c as u8, peer_addr_d as u8, ], + peer_addr6: None, peer_port: peer_port as u16, recv_pipe_idx, send_pipe_idx, diff --git a/host/src/kernel-worker.ts b/host/src/kernel-worker.ts index 47fd3c264..c6e53546d 100644 --- a/host/src/kernel-worker.ts +++ b/host/src/kernel-worker.ts @@ -2687,10 +2687,10 @@ export class CentralizedKernelWorker { * info to the process channel. The glue code (channel_syscall.c) reads * this after the syscall returns and invokes the handler. */ - private dequeueSignalForDelivery(channel: ChannelInfo): void { + private dequeueSignalForDelivery(channel: ChannelInfo): number { const dequeueSignal = this.kernelInstance!.exports .kernel_dequeue_signal as ((pid: number, outPtr: KernelPointer) => number) | undefined; - if (!dequeueSignal) return; + if (!dequeueSignal) return 0; // Use the signal area in kernel scratch as the output buffer const sigOutOffset = this.scratchOffset + CH_SIG_BASE; @@ -2705,10 +2705,12 @@ export class CentralizedKernelWorker { kernelMem.subarray(sigOutOffset, sigOutOffset + 44), channel.channelOffset + CH_SIG_BASE, ); + return sigResult; } else { // Clear entire signal delivery area in process channel (48 bytes) const sigStart = channel.channelOffset + CH_SIG_BASE; new Uint8Array(channel.memory.buffer, sigStart, 48).fill(0); + return 0; } } @@ -2953,6 +2955,9 @@ export class CentralizedKernelWorker { * Used for thread exit where we need to unblock the worker. */ private completeChannelRaw(channel: ChannelInfo, retVal: number, errVal: number): void { + this.syncSharedMappingsFromProcess(channel, true); + this.refreshSharedMappingsToProcess(channel); + // Clear handling flag (channel is done — poller can pick it up for next syscall) channel.handling = false; @@ -3675,6 +3680,12 @@ export class CentralizedKernelWorker { ): void { if (!this.processes.has(channel.pid)) return; + // EAGAIN-driven host waits park the process without normal syscall + // completion. Publish MAP_SHARED writes before parking so other processes + // observe standard shared-memory visibility while this thread blocks. + this.syncSharedMappingsFromProcess(channel, true); + this.refreshSharedMappingsToProcess(channel); + // Futex wait: use Atomics.waitAsync on the target address in process memory if (syscallNr === SYS_FUTEX) { const futexOp = origArgs[1] & 0x7f; // mask out FUTEX_PRIVATE_FLAG @@ -4065,6 +4076,13 @@ export class CentralizedKernelWorker { } if (delayMs > 0) { + // A host-delayed sleep parks the process without going through normal + // completeChannel(). Treat that park as a syscall boundary for + // MAP_SHARED: writes made before nanosleep/usleep must be visible to + // peer processes while this thread sleeps. + this.syncSharedMappingsFromProcess(channel, true); + this.refreshSharedMappingsToProcess(channel); + const timer = setTimeout(() => { this.pendingSleeps.delete(channel.pid); if (this.processes.has(channel.pid)) { @@ -4759,8 +4777,25 @@ export class CentralizedKernelWorker { const retVal = Number(kernelView.getBigInt64(CH_RETURN, true)); const errVal = kernelView.getUint32(CH_ERRNO, true); - // Handle signal delivery - this.dequeueSignalForDelivery(channel); + // Handle signal delivery. This host-side epoll emulation calls the + // kernel's poll helper with timeout=0 and then decides whether to + // block/retry in TypeScript. POSIX still requires a caught signal to + // interrupt epoll_wait/epoll_pwait with EINTR so user code can run the + // handler before re-entering the wait. Without completing the channel + // here, a process with a queued handler signal can stay parked in the + // host retry loop indefinitely. + const deliveredSignal = this.dequeueSignalForDelivery(channel); + const getExitStatus = this.kernelInstance!.exports + .kernel_get_process_exit_status as ((pid: number) => number) | undefined; + if (getExitStatus && getExitStatus(channel.pid) >= 128) { + this.handleProcessTerminated(channel); + return; + } + if (deliveredSignal > 0) { + this.completeChannelRaw(channel, -EINTR_ERRNO, EINTR_ERRNO); + this.relistenChannel(channel); + return; + } // If poll returned error (not EAGAIN), propagate it if (retVal < 0 && errVal !== EAGAIN) { @@ -7542,24 +7577,15 @@ export class CentralizedKernelWorker { } private syscallSynchronizesAnonymousSharedMemory(syscallNr: number): boolean { - return syscallNr === SYS_FORK - || syscallNr === SYS_VFORK - || syscallNr === SYS_CLONE - || syscallNr === SYS_EXIT - || syscallNr === SYS_EXIT_GROUP - || syscallNr === SYS_WAIT4 - || syscallNr === SYS_WAITID - || syscallNr === SYS_FUTEX - || syscallNr === SYS_POLL - || syscallNr === SYS_PPOLL - || syscallNr === SYS_SELECT - || syscallNr === SYS_PSELECT6 - || syscallNr === SYS_EPOLL_WAIT - || syscallNr === SYS_EPOLL_PWAIT - || syscallNr === SYS_RT_SIGTIMEDWAIT - || syscallNr === SYS_MSYNC - || syscallNr === SYS_MUNMAP - || syscallNr === SYS_MREMAP; + // Anonymous MAP_SHARED mappings are ordinary shared memory: writes made by + // a process before it enters the kernel must be visible to peers that + // subsequently enter the kernel. Centralized Kandelo processes use + // separate Wasm memories, so every syscall boundary for the *current* + // process is our coherence point. This intentionally does not scrape + // other live processes from a peer's syscall; doing so can publish + // mid-update shared-memory state that the writer has not synchronized. + void syscallNr; + return true; } private syncSharedMappingsFromProcess(channel: ChannelInfo, includeAnonymous = true): void { @@ -7586,10 +7612,23 @@ export class CentralizedKernelWorker { )) { continue; } - const bytes = processMem.subarray(mapAddr + offset, mapAddr + offset + n); - this.copyRangeToBacking(backing, mapping.fileOffset + offset, bytes, true); - mapping.snapshot.set(bytes, offset); - changed = true; + // Copy only bytes this process actually changed relative to its last + // shared-memory snapshot. MAP_SHARED mappings can be modified by + // multiple processes between syscall boundaries. Copying an entire + // host page from one process when it changed only a small field would + // overwrite disjoint writes already published by another process with + // this process's stale view of that page. + if (this.copyChangedSharedMappingRanges( + backing, + processMem, + mapAddr + offset, + mapping.snapshot, + offset, + mapping.fileOffset + offset, + n, + )) { + changed = true; + } } if (changed) { @@ -7599,6 +7638,50 @@ export class CentralizedKernelWorker { } } + private copyChangedSharedMappingRanges( + backing: SharedMmapBacking, + processMem: Uint8Array, + processOffset: number, + snapshot: Uint8Array, + snapshotOffset: number, + backingOffset: number, + len: number, + ): boolean { + let changed = false; + let i = 0; + while (i < len) { + while ( + i < len + && processMem[processOffset + i] === snapshot[snapshotOffset + i] + ) { + i++; + } + if (i >= len) break; + + const runStart = i; + do { + i++; + } while ( + i < len + && processMem[processOffset + i] !== snapshot[snapshotOffset + i] + ); + + const bytes = processMem.subarray( + processOffset + runStart, + processOffset + i, + ); + this.copyRangeToBacking( + backing, + backingOffset + runStart, + bytes, + true, + ); + snapshot.set(bytes, snapshotOffset + runStart); + changed = true; + } + return changed; + } + private refreshSharedMappingsToProcess(channel: ChannelInfo, includeAnonymous = true): void { const pidMap = this.sharedMappings.get(channel.pid); if (!pidMap || pidMap.size === 0) return; diff --git a/host/test/select-timeout-retry.test.ts b/host/test/select-timeout-retry.test.ts index 4bf49a426..4656d4845 100644 --- a/host/test/select-timeout-retry.test.ts +++ b/host/test/select-timeout-retry.test.ts @@ -63,6 +63,97 @@ describe("centralized select/pselect timeout retries", () => { 0, ); }); + + it("interrupts host-side epoll_pwait emulation when a handler signal is pending", () => { + const kernelMemory = createSharedMemory(); + const processMemory = createSharedMemory(); + const scratchOffset = 128; + const handleChannel = vi.fn(() => { + const kernelView = new DataView(kernelMemory.buffer, scratchOffset); + kernelView.setBigInt64(CH_RETURN, 0n, true); + kernelView.setUint32(CH_ERRNO, 0, true); + return 0; + }); + const worker = createWorkerHarness({ + kernel_handle_channel: handleChannel, + kernel_get_process_exit_status: () => 0, + }); + worker.kernelMemory = kernelMemory; + worker.scratchOffset = scratchOffset; + worker.epollInterests = new Map([ + ["42:7", [{ fd: 3, events: 0x001, data: 99n }]], + ]); + worker.dequeueSignalForDelivery = vi.fn(() => 15); + worker.completeChannelRaw = vi.fn(); + worker.relistenChannel = vi.fn(); + + const channel = createChannel(42, processMemory); + worker.processes = new Map([ + [42, { pid: 42, memory: processMemory, channels: [channel], ptrWidth: 4 }], + ]); + worker.activeChannels = [channel]; + + worker.handleEpollPwait(channel, 241, [7, 4096, 1, 1000, 0, 8]); + + expect(worker.dequeueSignalForDelivery).toHaveBeenCalledWith(channel); + expect(worker.completeChannelRaw).toHaveBeenCalledWith(channel, -4, 4); + expect(worker.relistenChannel).toHaveBeenCalledWith(channel); + expect(worker.pendingPollRetries.size).toBe(0); + }); + + it("merges disjoint MAP_SHARED writes from live processes", () => { + const worker = createWorkerHarness({}); + const mem1 = createSharedMemory(); + const mem2 = createSharedMemory(); + const addr = 1024; + const len = 16; + const key = "anon:test"; + const backing = { + key, + path: "", + handle: -1, + anonymous: true, + writable: true, + pages: new Map(), + dirtyPages: new Set(), + refCount: 2, + version: 0, + }; + const makeMapping = () => ({ + fd: -1, + fileOffset: 0, + len, + writable: true, + backingKey: key, + snapshot: new Uint8Array(len), + version: 0, + }); + + new Uint8Array(mem1.buffer)[addr] = "A".charCodeAt(0); + new Uint8Array(mem2.buffer)[addr + 1] = "B".charCodeAt(0); + const channel1 = createChannel(1, mem1); + const channel2 = createChannel(2, mem2); + worker.processes = new Map([ + [1, { pid: 1, memory: mem1, channels: [channel1], ptrWidth: 4 }], + [2, { pid: 2, memory: mem2, channels: [channel2], ptrWidth: 4 }], + ]); + worker.sharedMmapBackings = new Map([[key, backing]]); + worker.sharedMappings = new Map([ + [1, new Map([[addr, makeMapping()]])], + [2, new Map([[addr, makeMapping()]])], + ]); + + worker.syncSharedMappingsFromProcess(channel1, true); + worker.syncSharedMappingsFromProcess(channel2, true); + const latest = worker.readBackingRange(backing, 0, len); + expect(String.fromCharCode(latest[0], latest[1])).toBe("AB"); + + worker.refreshSharedMappingsToProcess(channel1, true); + expect(String.fromCharCode( + new Uint8Array(mem1.buffer)[addr], + new Uint8Array(mem1.buffer)[addr + 1], + )).toBe("AB"); + }); }); function createWorkerHarness(exports: Record): any { @@ -97,6 +188,7 @@ function createWorkerHarness(exports: Record): any { tcpListenerTargets: new Map(), tcpListenerRRIndex: new Map(), sharedMappings: new Map(), + sharedMmapBackings: new Map(), tcpConnections: new Map(), shmMappings: new Map(), usePolling: false, From 5368b790213d933296db187c5f2b342358aa5a8c Mon Sep 17 00:00:00 2001 From: Brandon Payton Date: Fri, 19 Jun 2026 02:23:04 -0400 Subject: [PATCH 5/8] fix: restore fork socket exec reader --- crates/kernel/src/fork.rs | 123 +++++++++++++++++++++++++++++ crates/kernel/src/process_table.rs | 2 +- 2 files changed, 124 insertions(+), 1 deletion(-) diff --git a/crates/kernel/src/fork.rs b/crates/kernel/src/fork.rs index 4f4925423..8898090d7 100644 --- a/crates/kernel/src/fork.rs +++ b/crates/kernel/src/fork.rs @@ -1232,6 +1232,129 @@ pub fn deserialize_fork_state(buf: &[u8], child_pid: u32) -> Result) -> Result { + let mut sockets = SocketTable::new(); + if r.remaining() < 8 { + return Ok(sockets); + } + + use crate::socket::{SocketDomain, SocketInfo, SocketState, SocketType}; + let _total_slots = r.read_u32()? as usize; + let sock_count = r.read_u32()? as usize; + for _ in 0..sock_count { + let idx = r.read_u32()? as usize; + let domain = match r.read_u32()? { + 0 => SocketDomain::Unix, + 1 => SocketDomain::Inet, + 2 => SocketDomain::Inet6, + 3 => SocketDomain::Netlink, + _ => return Err(Errno::EINVAL), + }; + let sock_type = match r.read_u32()? { + 0 => SocketType::Stream, + 1 => SocketType::Dgram, + _ => return Err(Errno::EINVAL), + }; + let protocol = r.read_u32()?; + let state = match r.read_u32()? { + 0 => SocketState::Unbound, + 1 => SocketState::Bound, + 2 => SocketState::Listening, + 3 => SocketState::Connected, + 4 => SocketState::Closed, + _ => return Err(Errno::EINVAL), + }; + let peer_idx_raw = r.read_u32()?; + let peer_idx = if peer_idx_raw == 0xFFFFFFFF { + None + } else { + Some(peer_idx_raw as usize) + }; + let recv_raw = r.read_u32()?; + let recv_buf_idx = if recv_raw == 0xFFFFFFFF { + None + } else { + Some(recv_raw as usize) + }; + let send_raw = r.read_u32()?; + let send_buf_idx = if send_raw == 0xFFFFFFFF { + None + } else { + Some(send_raw as usize) + }; + let shut_rd = r.read_u32()? != 0; + let shut_wr = r.read_u32()? != 0; + let host_handle_raw = r.read_u32()?; + let host_net_handle = if host_handle_raw == 0xFFFFFFFF { + None + } else { + Some(host_handle_raw as i32) + }; + + let opt_count = r.read_u32()? as usize; + let mut options = Vec::new(); + for _ in 0..opt_count { + let level = r.read_u32()?; + let optname = r.read_u32()?; + let value = r.read_u32()?; + options.push((level, optname, value)); + } + + let mut bind_addr = [0u8; 4]; + bind_addr.copy_from_slice(r.read_bytes(4)?); + let bind_port = r.read_u32()? as u16; + let mut peer_addr = [0u8; 4]; + peer_addr.copy_from_slice(r.read_bytes(4)?); + let peer_port = r.read_u32()? as u16; + + let backlog_count = r.read_u32()? as usize; + for _ in 0..backlog_count { + let _ = r.read_u32()?; + } + + let mut sock = SocketInfo::new(domain, sock_type, protocol); + sock.state = state; + sock.peer_idx = peer_idx; + sock.recv_buf_idx = recv_buf_idx; + sock.send_buf_idx = send_buf_idx; + sock.shut_rd = shut_rd; + sock.shut_wr = shut_wr; + sock.host_net_handle = host_net_handle; + sock.options = options; + sock.bind_addr = bind_addr; + sock.bind_port = bind_port; + sock.peer_addr = peer_addr; + sock.peer_port = peer_port; + sock.global_pipes = r.read_u32()? != 0; + + let shared_backlog_raw = r.read_u32()?; + sock.shared_backlog_idx = if shared_backlog_raw == 0xFFFFFFFF { + None + } else { + Some(shared_backlog_raw as usize) + }; + + if r.remaining() >= 4 { + let bind_path_len = r.read_u32()?; + if bind_path_len != 0xFFFFFFFF { + sock.bind_path = Some(r.read_bytes(bind_path_len as usize)?.to_vec()); + } + } + if r.remaining() >= 4 { + let accept_wake_raw = r.read_u32()?; + sock.accept_wake_idx = if accept_wake_raw == 0xFFFFFFFF { + None + } else { + Some(accept_wake_raw) + }; + } + sockets.insert_at(idx, sock); + } + + Ok(sockets) +} + // ── Exec Serialize ────────────────────────────────────────────────────────── /// Serialize the process state into a binary buffer for exec. diff --git a/crates/kernel/src/process_table.rs b/crates/kernel/src/process_table.rs index 266ef9193..8cbc37831 100644 --- a/crates/kernel/src/process_table.rs +++ b/crates/kernel/src/process_table.rs @@ -949,7 +949,7 @@ mod wait_tests { let mut table = ProcessTable::new(); let first_pid = table.allocate_spawn_pid(); - table.processes.insert(first_pid, Process::new_boxed(first_pid)); + table.processes.insert(first_pid, *Process::new_boxed(first_pid)); table.processes.remove(&first_pid); let second_pid = table.allocate_spawn_pid(); From 8496bd04c35220aa60ffa218f0c73043aa1876ce Mon Sep 17 00:00:00 2001 From: Kandelo Agent Date: Mon, 15 Jun 2026 10:05:52 +0000 Subject: [PATCH 6/8] fix: support fork from dlopened wasm side modules (cherry picked from commit 79600da60b46c05ae5842bbe04bb4199d18d74fc) --- crates/fork-instrument/src/call_graph.rs | 24 ++++ crates/fork-instrument/tests/call_graph.rs | 37 +++++ host/src/dylink.ts | 109 ++++++++++++++- host/src/worker-main.ts | 92 ++++++++++-- .../fork-from-dlopen-side-module-e2e.test.ts | 132 ++++++++++++++++++ 5 files changed, 383 insertions(+), 11 deletions(-) create mode 100644 host/test/fork-from-dlopen-side-module-e2e.test.ts diff --git a/crates/fork-instrument/src/call_graph.rs b/crates/fork-instrument/src/call_graph.rs index 980bf65ca..593de285f 100644 --- a/crates/fork-instrument/src/call_graph.rs +++ b/crates/fork-instrument/src/call_graph.rs @@ -348,6 +348,14 @@ const MAX_INDIRECT_DEPTH: u8 = 2; pub fn reaching_closure(module: &Module, seed: FunctionId) -> HashSet { let profiles = profile_functions(module); let table_targets = table_targets(module, &profiles); + let has_dynamic_linker_imports = module.imports.iter().any(|import| { + import.module == "env" + && matches!(import.kind, ImportKind::Function(_)) + && matches!( + import.name.as_str(), + "__wasm_dlopen" | "__wasm_dlsym" | "__wasm_dlclose" | "__wasm_dlerror" + ) + }); // Reverse direct-call graph: `callee -> set of callers`. let mut reverse_direct: HashMap> = HashMap::new(); @@ -411,6 +419,22 @@ pub fn reaching_closure(module: &Module, seed: FunctionId) -> HashSet = profiles + .iter() + .filter_map(|(caller, profile)| (!profile.indirect.is_empty()).then_some(*caller)) + .collect(); + for caller in dynamic_indirect_roots { + enqueue( + caller, + 1, + &mut best_indirect_depth, + &mut result, + &mut worklist, + ); + } + } + while let Some((g, indirect_depth)) = worklist.pop_front() { // (2) Direct-reverse: who calls g directly? if let Some(callers) = reverse_direct.get(&g) { diff --git a/crates/fork-instrument/tests/call_graph.rs b/crates/fork-instrument/tests/call_graph.rs index 4062105e4..c3159bb6e 100644 --- a/crates/fork-instrument/tests/call_graph.rs +++ b/crates/fork-instrument/tests/call_graph.rs @@ -388,6 +388,43 @@ fn passive_element_with_table_init_is_followed() { ); } +#[test] +fn dynamic_linker_indirect_call_is_conservative_fork_boundary() { + // A dlopen/dlsym-capable main module can have side-module functions + // inserted into its indirect function table by the host after static + // analysis. If such a side-module function calls fork(), the main-module + // frame above the call_indirect must be serializable even though the + // side-module target is not present in any static element segment. + let wat = r#" + (module + (import "kernel" "kernel_fork" (func $fork (result i32))) + (import "env" "__wasm_dlsym" (func $dlsym (param i32 i32 i32) (result i32))) + (type $side_fn_ty (func (result i32))) + (table $t 1 funcref) + (func $dispatch_side_callback (export "dispatch_side_callback") (result i32) + i32.const 0 + call_indirect $t (type $side_fn_ty)) + (func $parent_frame (export "parent_frame") (result i32) + call $dispatch_side_callback) + (func $ordinary (export "ordinary") (result i32) + i32.const 7)) + "#; + let found = discover(wat); + assert!( + found.iter().any(|n| n == "dispatch_side_callback"), + "dynamic-linking call_indirect sites must be instrumented as potential \ + side-module fork boundaries; got {found:?}" + ); + assert!( + found.iter().any(|n| n == "parent_frame"), + "direct callers above a dynamic side-module dispatch must also be saved; got {found:?}" + ); + assert!( + !found.iter().any(|n| n == "ordinary"), + "unrelated dynamic-linking functions without call_indirect should stay out; got {found:?}" + ); +} + #[test] fn indirect_closure_allows_two_hops_but_does_not_cascade_forever() { // Models trampoline-shaped runtimes without allowing unbounded diff --git a/host/src/dylink.ts b/host/src/dylink.ts index f159d2b8e..ef0b613f9 100644 --- a/host/src/dylink.ts +++ b/host/src/dylink.ts @@ -6,6 +6,8 @@ * https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md */ +import { FORK_SAVE_BUFFER_SIZE } from "./process-memory"; + // dylink.0 sub-section types const WASM_DYLINK_MEM_INFO = 1; const WASM_DYLINK_NEEDED = 2; @@ -16,6 +18,14 @@ const WASM_DYLINK_IMPORT_INFO = 4; const WASM_DYLINK_FLAG_TLS = 0x01; const WASM_DYLINK_FLAG_WEAK = 0x02; +const WPK_FORK_EXPORTS = [ + "wpk_fork_unwind_begin", + "wpk_fork_unwind_end", + "wpk_fork_rewind_begin", + "wpk_fork_rewind_end", + "wpk_fork_state", +] as const; + export interface DylinkMetadata { /** Bytes of linear memory this module needs */ memorySize: number; @@ -170,6 +180,21 @@ export interface LoadedSharedLibrary { metadata: DylinkMetadata; /** Path/name of the library */ name: string; + /** Fork save buffer for side modules that can call fork via env.fork. */ + forkBufAddr?: number; +} + +export interface SideModuleForkState { + name: string; + instance: WebAssembly.Instance; + forkBufAddr: number; +} + +export interface SideModuleForkSupport { + /** Mark the side module whose stack is currently unwinding for fork(). */ + setActiveFork: (state: SideModuleForkState) => void; + /** Clear an active side-module fork after rewind reaches env.fork again. */ + clearActiveFork: (state: SideModuleForkState) => void; } /** @@ -193,6 +218,8 @@ export interface DylinkReplayOptions { * the memcpy'd data section encode (memoryBase + offset); using any * other base corrupts pointers. */ memoryBase: number; + /** Side-module fork save buffer copied from the parent, if any. */ + forkBufAddr?: number; } /** @@ -215,6 +242,8 @@ export interface LoadSharedLibraryOptions { got: Map; /** Already-loaded libraries for dedup and dependency resolution */ loadedLibraries: Map; + /** Multi-module fork support for side modules loaded into this process. */ + sideModuleFork?: SideModuleForkSupport; /** Callback to locate and read a library file by name (async version) */ resolveLibrary?: (name: string) => Promise; /** Callback to locate and read a library file by name (sync version) */ @@ -345,6 +374,72 @@ function instantiateSharedLibrary( ? new (WebAssembly as any).Tag({ parameters: ["i32"] }) : undefined; + const module = new WebAssembly.Module(wasmBytes as unknown as BufferSource); + const moduleImports = WebAssembly.Module.imports(module); + const moduleExports = WebAssembly.Module.exports(module); + const importsFork = moduleImports.some((imp) => + imp.module === "env" && imp.name === "fork" && imp.kind === "function" + ); + const hasCompleteForkInstrumentation = + WPK_FORK_EXPORTS.every((name) => + moduleExports.some((exp) => exp.kind === "function" && exp.name === name), + ); + const hasAnyForkInstrumentation = + WPK_FORK_EXPORTS.some((name) => + moduleExports.some((exp) => exp.kind === "function" && exp.name === name), + ); + if (hasAnyForkInstrumentation && !hasCompleteForkInstrumentation) { + const missing = WPK_FORK_EXPORTS.filter((name) => + !moduleExports.some((exp) => exp.kind === "function" && exp.name === name), + ); + throw new Error(`${name}: incomplete wasm-fork-instrument exports; missing ${missing.join(", ")}`); + } + + const sideForkBufAddr = importsFork && hasCompleteForkInstrumentation && options.sideModuleFork + ? (replay?.forkBufAddr + ?? options.allocateMemory?.(FORK_SAVE_BUFFER_SIZE, 16) + ?? 0) + : 0; + let instance: WebAssembly.Instance | null = null; + let sideForkState: SideModuleForkState | null = null; + + const sideModuleForkImport = (): number => { + if (!options.sideModuleFork || sideForkBufAddr === 0 || !instance) { + throw new Error( + `${name}: env.fork reached without complete side-module fork support; ` + + "rebuild the side module with wasm-fork-instrument --entry env.fork", + ); + } + + const mainFork = options.globalSymbols.get("fork"); + if (typeof mainFork !== "function") { + throw new Error(`${name}: env.fork could not resolve main module fork`); + } + + const state = (instance.exports.wpk_fork_state as () => number)(); + if (state === 2) { + (instance.exports.wpk_fork_rewind_end as () => void)(); + const result = Number((mainFork as () => number)()); + if (sideForkState) { + options.sideModuleFork.clearActiveFork(sideForkState); + sideForkState = null; + } + return result; + } + + (instance.exports.wpk_fork_unwind_begin as (addr: number) => void)(sideForkBufAddr); + sideForkState = { name, instance, forkBufAddr: sideForkBufAddr }; + options.sideModuleFork.setActiveFork(sideForkState); + return Number((mainFork as () => number)()); + }; + + const uninstrumentedSideModuleForkImport = (): number => { + throw new Error( + `${name}: env.fork reached from an uninstrumented side module. ` + + "Rebuild the side module with wasm-fork-instrument --entry env.fork.", + ); + }; + // Construct imports const imports: WebAssembly.Imports = { env: new Proxy({} as Record, { @@ -356,6 +451,13 @@ function instantiateSharedLibrary( case "__table_base": return tableBaseGlobal; case "__stack_pointer": return options.stackPointer; case "__c_longjmp": return longjmpTag; + case "fork": + if (importsFork && options.sideModuleFork) { + return hasCompleteForkInstrumentation + ? sideModuleForkImport + : uninstrumentedSideModuleForkImport; + } + break; } const sym = options.globalSymbols.get(prop); if (sym !== undefined) return sym; @@ -364,6 +466,7 @@ function instantiateSharedLibrary( has(_target, prop: string) { if (["memory", "__indirect_function_table", "__memory_base", "__table_base", "__stack_pointer", "__c_longjmp"].includes(prop)) return true; + if (prop === "fork" && importsFork && options.sideModuleFork) return true; return options.globalSymbols.has(prop); }, }), @@ -379,9 +482,8 @@ function instantiateSharedLibrary( }), }; - // Compile and instantiate synchronously - const module = new WebAssembly.Module(wasmBytes as unknown as BufferSource); - const instance = new WebAssembly.Instance(module, imports); + // Instantiate synchronously + instance = new WebAssembly.Instance(module, imports); // Relocate exports: data address globals need memoryBase added const relocatedExports: Record = {}; @@ -448,6 +550,7 @@ function instantiateSharedLibrary( exports: relocatedExports, metadata, name, + forkBufAddr: sideForkBufAddr || undefined, }; options.loadedLibraries.set(name, loaded); diff --git a/host/src/worker-main.ts b/host/src/worker-main.ts index 8a9ad298f..569ebe4ca 100644 --- a/host/src/worker-main.ts +++ b/host/src/worker-main.ts @@ -10,7 +10,7 @@ import type { CentralizedThreadInitMessage, WorkerToHostMessage, } from "./worker-protocol"; -import { DynamicLinker, type LoadedSharedLibrary } from "./dylink"; +import { DynamicLinker, type LoadedSharedLibrary, type SideModuleForkState } from "./dylink"; import { extractAbiVersion } from "./constants"; import { ABI_SYSCALLS, @@ -180,6 +180,10 @@ export interface DlopenSupport { * fork-child path AFTER setupChannelBase and BEFORE the wpk_fork * rewind into _start. */ replayDlopens: () => void; + /** Finish side-module fork unwind after the main module has unwound. */ + completeSideModuleForkUnwind: () => void; + /** Begin side-module fork rewind before re-entering the main module. */ + beginSideModuleForkRewind: () => void; } /** @@ -203,13 +207,18 @@ function buildDlopenImports( ): DlopenSupport { let linker: DynamicLinker | null = null; const loadedLibraries = new Map(); + let activeSideFork: SideModuleForkState | null = null; const decoder = new TextDecoder(); const encoder = new TextEncoder(); const n = (v: number | bigint): number => typeof v === "bigint" ? Number(v) : v; const forkBufAddr = channelOffset - FORK_BUF_SIZE; const headOffset = ptrWidth === 8 ? DLOPEN_HEAD_OFFSET_WASM64 : DLOPEN_HEAD_OFFSET_WASM32; + const sideForkOffset = ptrWidth === 8 + ? DLOPEN_ACTIVE_SIDE_FORK_OFFSET_WASM64 + : DLOPEN_ACTIVE_SIDE_FORK_OFFSET_WASM32; const headSlot = forkBufAddr - headOffset; + const activeSideForkSlot = forkBufAddr - sideForkOffset; const entrySize = ptrWidth === 8 ? DLOPEN_ENTRY_SIZE_WASM64 : DLOPEN_ENTRY_SIZE_WASM32; const readPtr = (view: DataView, addr: number): number => @@ -283,6 +292,21 @@ function buildDlopenImports( globalSymbols, got: new Map(), loadedLibraries, + sideModuleFork: { + setActiveFork: (state) => { + activeSideFork = state; + writePtr(new DataView(memory.buffer), activeSideForkSlot, state.forkBufAddr); + }, + clearActiveFork: (state) => { + if (activeSideFork?.forkBufAddr === state.forkBufAddr) { + activeSideFork = null; + } + const view = new DataView(memory.buffer); + if (readPtr(view, activeSideForkSlot) === state.forkBufAddr) { + writePtr(view, activeSideForkSlot, 0); + } + }, + }, }); return linker; }; @@ -291,7 +315,12 @@ function buildDlopenImports( // entry is one mmap block: struct, then name UTF-8 (padded to 8-byte // alignment), then the side-module wasm bytes. Pointers are absolute // — fork's memcpy preserves the parent's address space. - const persistArchiveEntry = (name: string, bytes: Uint8Array, memoryBase: number): void => { + const persistArchiveEntry = ( + name: string, + bytes: Uint8Array, + memoryBase: number, + sideForkBufAddr: number, + ): void => { const nameBytes = encoder.encode(name); const nameLen = nameBytes.length; const nameAligned = (nameLen + 7) & ~7; @@ -309,6 +338,7 @@ function buildDlopenImports( view.setBigUint64(entry + 24, BigInt(bytesPtr), true); view.setBigUint64(entry + 32, BigInt(bytes.length), true); view.setBigUint64(entry + 40, BigInt(memoryBase), true); + view.setBigUint64(entry + 48, BigInt(sideForkBufAddr), true); } else { view.setUint32(entry + 0, 0, true); view.setUint32(entry + 4, namePtr, true); @@ -316,6 +346,7 @@ function buildDlopenImports( view.setUint32(entry + 12, bytesPtr, true); view.setUint32(entry + 16, bytes.length, true); view.setUint32(entry + 20, memoryBase, true); + view.setUint32(entry + 24, sideForkBufAddr, true); } new Uint8Array(memory.buffer, namePtr, nameLen).set(nameBytes); @@ -349,7 +380,7 @@ function buildDlopenImports( const lk = getLinker(); while (cursor !== 0) { - let next: number, namePtr: number, nameLen: number, bytesPtr: number, bytesLen: number, memoryBase: number; + let next: number, namePtr: number, nameLen: number, bytesPtr: number, bytesLen: number, memoryBase: number, sideForkBufAddr: number; if (ptrWidth === 8) { next = Number(view.getBigUint64(cursor + 0, true)); namePtr = Number(view.getBigUint64(cursor + 8, true)); @@ -357,6 +388,7 @@ function buildDlopenImports( bytesPtr = Number(view.getBigUint64(cursor + 24, true)); bytesLen = Number(view.getBigUint64(cursor + 32, true)); memoryBase = Number(view.getBigUint64(cursor + 40, true)); + sideForkBufAddr = Number(view.getBigUint64(cursor + 48, true)); } else { next = view.getUint32(cursor + 0, true); namePtr = view.getUint32(cursor + 4, true); @@ -364,6 +396,7 @@ function buildDlopenImports( bytesPtr = view.getUint32(cursor + 12, true); bytesLen = view.getUint32(cursor + 16, true); memoryBase = view.getUint32(cursor + 20, true); + sideForkBufAddr = view.getUint32(cursor + 24, true); } // Copy name + bytes out of shared memory before passing to @@ -376,15 +409,54 @@ function buildDlopenImports( const bytesCopy = new Uint8Array(new Uint8Array(memory.buffer, bytesPtr, bytesLen)); // DynamicLinker.dlopenSync returns 0 on error, >0 on success. - const handle = lk.dlopenSync(name, bytesCopy, { memoryBase }); + const handle = lk.dlopenSync(name, bytesCopy, { memoryBase, forkBufAddr: sideForkBufAddr || undefined }); if (handle === 0) { throw new Error(`dlopen(${name}): ${lk.dlerror() || "unknown"}`); } + const loaded = loadedLibraries.get(name); + if (loaded?.forkBufAddr && loaded.forkBufAddr === readPtr(view, activeSideForkSlot)) { + activeSideFork = { name, instance: loaded.instance, forkBufAddr: loaded.forkBufAddr }; + } cursor = next; } }; + const findActiveSideFork = (): SideModuleForkState | null => { + if (activeSideFork) return activeSideFork; + const view = new DataView(memory.buffer); + const activeForkBufAddr = readPtr(view, activeSideForkSlot); + if (activeForkBufAddr === 0) return null; + for (const loaded of loadedLibraries.values()) { + if (loaded.forkBufAddr === activeForkBufAddr) { + activeSideFork = { + name: loaded.name, + instance: loaded.instance, + forkBufAddr: loaded.forkBufAddr, + }; + return activeSideFork; + } + } + return null; + }; + + const completeSideModuleForkUnwind = (): void => { + const sideFork = findActiveSideFork(); + if (!sideFork) return; + const state = (sideFork.instance.exports.wpk_fork_state as (() => number) | undefined)?.(); + if (state === 1) { + (sideFork.instance.exports.wpk_fork_unwind_end as () => void)(); + } + }; + + const beginSideModuleForkRewind = (): void => { + const sideFork = findActiveSideFork(); + if (!sideFork) return; + (sideFork.instance.exports.wpk_fork_rewind_begin as (addr: number) => void)( + sideFork.forkBufAddr, + ); + }; + const imports: Record = { __wasm_dlopen: (bytesPtr: number, bytesLen: number, namePtr: number, nameLen: number): number => { @@ -408,7 +480,7 @@ function buildDlopenImports( if (!loaded) { throw new Error(`__wasm_dlopen(${name}): handle=${handle} but loadedLibraries lookup failed`); } - persistArchiveEntry(name, bytesCopy, loaded.memoryBase); + persistArchiveEntry(name, bytesCopy, loaded.memoryBase, loaded.forkBufAddr ?? 0); } return handle; }, @@ -437,7 +509,7 @@ function buildDlopenImports( }, }; - return { imports, replayDlopens }; + return { imports, replayDlopens, completeSideModuleForkUnwind, beginSideModuleForkRewind }; } /** @@ -696,8 +768,10 @@ const FORK_BUF_SIZE = FORK_SAVE_BUFFER_SIZE; // wpk_fork rewind. const DLOPEN_HEAD_OFFSET_WASM32 = 12; const DLOPEN_HEAD_OFFSET_WASM64 = 24; -const DLOPEN_ENTRY_SIZE_WASM32 = 24; -const DLOPEN_ENTRY_SIZE_WASM64 = 48; +const DLOPEN_ACTIVE_SIDE_FORK_OFFSET_WASM32 = 16; +const DLOPEN_ACTIVE_SIDE_FORK_OFFSET_WASM64 = 32; +const DLOPEN_ENTRY_SIZE_WASM32 = 28; +const DLOPEN_ENTRY_SIZE_WASM64 = 56; const WPK_FORK_EXPORTS = [ "wpk_fork_unwind_begin", @@ -1002,6 +1076,7 @@ export async function centralizedWorkerMain( } replayedForkChildDlopens = true; } + dlopenSupport.beginSideModuleForkRewind(); needsRewind = false; } @@ -1021,6 +1096,7 @@ export async function centralizedWorkerMain( if (forkState === 1) { // Unwind completed (fork) — finalize and send SYS_FORK. unwindEnd(); + dlopenSupport.completeSideModuleForkUnwind(); // Send SYS_FORK through the channel now that memory has the // fork save buffer populated (saved_globals + frames). diff --git a/host/test/fork-from-dlopen-side-module-e2e.test.ts b/host/test/fork-from-dlopen-side-module-e2e.test.ts new file mode 100644 index 000000000..397cc2563 --- /dev/null +++ b/host/test/fork-from-dlopen-side-module-e2e.test.ts @@ -0,0 +1,132 @@ +import { describe, it, expect, beforeAll } from "vitest"; +import { execSync } from "node:child_process"; +import { readFileSync, writeFileSync, mkdirSync, existsSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { tmpdir } from "node:os"; +import { fileURLToPath } from "node:url"; +import { runCentralizedProgram } from "./centralized-test-helper"; +import { NodePlatformIO } from "../src/platform/node"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = join(__dirname, "../.."); +const SYSROOT = join(REPO_ROOT, "sysroot"); +const GLUE_DIR = join(REPO_ROOT, "libc", "glue"); +const LLVM_BIN = process.env.LLVM_BIN; +const CLANG = process.env.CLANG || (LLVM_BIN ? join(LLVM_BIN, "clang") : "clang"); +const WASM_LD = process.env.WASM_LD || (LLVM_BIN ? join(LLVM_BIN, "wasm-ld") : "wasm-ld"); +const FORK_INSTRUMENT_FALLBACK = join(REPO_ROOT, "scripts", "run-wasm-fork-instrument.sh"); +const hasSysroot = existsSync(join(SYSROOT, "lib", "libc.a")); +const hasKernel = existsSync(join(REPO_ROOT, "binaries", "kernel.wasm")) || existsSync(join(REPO_ROOT, "local-binaries", "kernel.wasm")); +const BUILD_DIR = join(tmpdir(), "wasm-fork-from-side-e2e"); + +function instrumentTool(): string { + if (process.env.FORK_INSTRUMENT) return process.env.FORK_INSTRUMENT; + if (process.env.HOST_TARGET) { + const candidate = join(REPO_ROOT, "target", process.env.HOST_TARGET, "release", "wasm-fork-instrument"); + if (existsSync(candidate)) return candidate; + } + try { + const hostTarget = execSync("rustc -vV | awk '/^host:/ {print $2}'", { encoding: "utf8" }).trim(); + const candidate = join(REPO_ROOT, "target", hostTarget, "release", "wasm-fork-instrument"); + if (existsSync(candidate)) return candidate; + } catch { + // Fall back to the wrapper below; it knows how to build the tool. + } + return FORK_INSTRUMENT_FALLBACK; +} + +function buildSharedLib(source: string, name: string, instrumentFork: boolean): string { + const srcPath = join(BUILD_DIR, `${name}.c`); + const objPath = join(BUILD_DIR, `${name}.o`); + const soPath = join(BUILD_DIR, `${name}.so`); + writeFileSync(srcPath, source); + execSync(`${CLANG} --target=wasm32-unknown-unknown -fPIC -O2 -matomics -mbulk-memory -c ${srcPath} -o ${objPath}`, { stdio: "pipe" }); + execSync(`${WASM_LD} --experimental-pic --shared --shared-memory --export-all --allow-undefined -o ${soPath} ${objPath}`, { stdio: "pipe" }); + if (instrumentFork) { + execSync(`${instrumentTool()} ${soPath} -o ${soPath}.instr --entry env.fork`, { stdio: "pipe" }); + execSync(`mv ${soPath}.instr ${soPath}`, { stdio: "pipe" }); + } + return soPath; +} + +function buildMainProgram(source: string, name: string): string { + const srcPath = join(BUILD_DIR, `${name}.c`); + const wasmPath = join(BUILD_DIR, `${name}.wasm`); + writeFileSync(srcPath, source); + const cflags = ["--target=wasm32-unknown-unknown", `--sysroot=${SYSROOT}`, "-nostdlib", "-O2", "-matomics", "-mbulk-memory", "-fno-trapping-math"]; + const linkFlags = [ + join(GLUE_DIR, "channel_syscall.c"), + join(GLUE_DIR, "compiler_rt.c"), + join(GLUE_DIR, "dlopen.c"), + join(SYSROOT, "lib", "crt1.o"), + join(SYSROOT, "lib", "libc.a"), + "-Wl,--entry=_start", + "-Wl,--export=_start", + "-Wl,--export=__heap_base", + "-Wl,--import-memory", + "-Wl,--shared-memory", + "-Wl,--max-memory=1073741824", + "-Wl,--allow-undefined", + "-Wl,--global-base=1114112", + "-Wl,--table-base=3", + "-Wl,--export-table", + "-Wl,--growable-table", + "-Wl,--export=__wasm_init_tls", + "-Wl,--export=__tls_base", + "-Wl,--export=__tls_size", + "-Wl,--export=__tls_align", + "-Wl,--export=__stack_pointer", + "-Wl,--export=__wasm_thread_init", + "-Wl,--export-all", + ]; + execSync(`${CLANG} ${[...cflags, srcPath, ...linkFlags, "-o", wasmPath].join(" ")}`, { stdio: "pipe" }); + execSync(`${instrumentTool()} ${wasmPath} -o ${wasmPath}.instr`, { stdio: "pipe" }); + execSync(`mv ${wasmPath}.instr ${wasmPath}`, { stdio: "pipe" }); + return wasmPath; +} + +describe.skipIf(!hasSysroot || !hasKernel)("fork from dlopened side module", () => { + beforeAll(() => mkdirSync(BUILD_DIR, { recursive: true })); + const io = () => new NodePlatformIO(); + + it("fork called by a side module resumes parent and child at the call site", { timeout: 30_000 }, async () => { + const soPath = buildSharedLib(` + extern int fork(void); + extern void exit(int); + int side_fork(void) { + int pid = fork(); + if (pid == 0) exit(0); + return pid; + } + `, "libforkinside", true); + + const wasmPath = buildMainProgram(` + #include + #include + #include + typedef int (*side_fork_fn)(void); + int main(int argc, char **argv) { + void *lib = dlopen(argv[1], RTLD_NOW); + if (!lib) { fprintf(stderr, "dlopen: %s\\n", dlerror()); return 1; } + side_fork_fn side_fork = (side_fork_fn)dlsym(lib, "side_fork"); + if (!side_fork) { fprintf(stderr, "dlsym: %s\\n", dlerror()); return 1; } + int pid = side_fork(); + if (pid < 0) { fprintf(stderr, "side fork failed: %d\\n", pid); return 1; } + int status = 0; + if (waitpid(pid, &status, 0) != pid) { fprintf(stderr, "waitpid failed\\n"); return 1; } + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { fprintf(stderr, "bad child status %d\\n", status); return 1; } + puts("ok"); + return 0; + } + `, "test-fork-from-side"); + + const result = await runCentralizedProgram({ + programPath: wasmPath, + argv: ["fork-from-side-main", soPath], + timeout: 30_000, + io: io(), + }); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("ok"); + }); +}); From fb768e016bc025f0a6b7d7f68384b3d255962941 Mon Sep 17 00:00:00 2001 From: Brandon Payton Date: Fri, 19 Jun 2026 05:29:21 -0400 Subject: [PATCH 7/8] Preserve shared mappings across fork for PHP PHPTs Port the PR #2 shared-memory/fork fixes onto current Automattic/kandelo. Centralized host workers now inherit and synchronize MAP_SHARED and SysV shared memory across fork, wait, detach, and exit boundaries so parent/child processes observe real shared state. Grow exec-state serialization instead of failing at a fixed 64 KiB buffer, keep Node/browser fork paths in parity, refresh the ABI snapshot for ABI 16, and update focused host/kernel fixtures and tests for the port. --- abi/snapshot.json | 13 +- crates/kernel/src/fork.rs | 47 ++ crates/kernel/src/lib.rs | 23 + crates/kernel/src/memory.rs | 6 +- crates/kernel/src/syscalls.rs | 140 ++++-- crates/kernel/src/wasm_api.rs | 9 +- crates/shared/src/lib.rs | 4 +- examples/mmap_shared_anonymous_fork.wasm | Bin 30840 -> 47148 bytes examples/run-example.ts | 6 +- host/src/browser-kernel-worker-entry.ts | 1 + host/src/generated/abi.ts | 2 +- host/src/kernel-worker.ts | 553 ++++++++++++++++------- host/src/node-kernel-worker-entry.ts | 26 ++ host/src/platform/node.ts | 9 + host/test/centralized-test-helper.ts | 1 + host/test/ifhwaddr.test.ts | 1 + host/test/multi-worker.test.ts | 3 + host/test/vfs.test.ts | 14 +- libc/glue/abi_constants.h | 2 +- 19 files changed, 618 insertions(+), 242 deletions(-) diff --git a/abi/snapshot.json b/abi/snapshot.json index 99678a2f2..87ae45000 100644 --- a/abi/snapshot.json +++ b/abi/snapshot.json @@ -1,5 +1,5 @@ { - "abi_version": 15, + "abi_version": 16, "channel_buffers": { "data_offset": 72, "data_size": 65536, @@ -113,7 +113,7 @@ }, "host_adapter": { "manifest": { - "abi_version": 15, + "abi_version": 16, "channel_data_offset": 72, "channel_data_size": 65536, "channel_header_size": 72, @@ -944,11 +944,6 @@ "name": "kernel_madvise", "signature": "(i32,i32,i32) -> (i32)" }, - { - "kind": "func", - "name": "kernel_mark_process_exited", - "signature": "(i32,i32) -> (i32)" - }, { "kind": "func", "name": "kernel_mark_process_signaled", @@ -1097,7 +1092,7 @@ { "kind": "func", "name": "kernel_preadv", - "signature": "(i32,i32,i32,i64) -> (i32)" + "signature": "(i32,i32,i32,i32,i32) -> (i32)" }, { "kind": "func", @@ -1137,7 +1132,7 @@ { "kind": "func", "name": "kernel_pwritev", - "signature": "(i32,i32,i32,i64) -> (i32)" + "signature": "(i32,i32,i32,i32,i32) -> (i32)" }, { "kind": "func", diff --git a/crates/kernel/src/fork.rs b/crates/kernel/src/fork.rs index 8898090d7..e8786ed31 100644 --- a/crates/kernel/src/fork.rs +++ b/crates/kernel/src/fork.rs @@ -45,6 +45,8 @@ const MAX_ENV_VARS: u32 = 65536; const MAX_ARGV: u32 = 65536; const MAX_PATH_LEN: usize = 1048576; // 1 MiB const MAX_STRING_LEN: usize = 1048576; // 1 MiB +const INITIAL_EXEC_STATE_BUFFER_LEN: usize = 64 * 1024; +const MAX_EXEC_STATE_BUFFER_LEN: usize = 4 * 1024 * 1024; // ── Writer helper ─────────────────────────────────────────────────────────── @@ -1502,6 +1504,26 @@ pub fn serialize_exec_state(proc: &Process, buf: &mut [u8]) -> Result Result, Errno> { + let mut len = INITIAL_EXEC_STATE_BUFFER_LEN; + + loop { + let mut buf = Vec::new(); + buf.resize(len, 0u8); + + match serialize_exec_state(proc, &mut buf) { + Ok(written) => { + buf.truncate(written); + return Ok(buf); + } + Err(Errno::ENOMEM) if len < MAX_EXEC_STATE_BUFFER_LEN => { + len = len.saturating_mul(2).min(MAX_EXEC_STATE_BUFFER_LEN); + } + Err(err) => return Err(err), + } + } +} + // ── Exec Deserialize ──────────────────────────────────────────────────────── /// Deserialize process state from an exec buffer. @@ -1931,6 +1953,31 @@ mod tests { assert_eq!(restored.signals.pending, 0); } + #[test] + fn test_exec_state_grows_for_large_environment() { + let mut proc = Process::new(1); + proc.environ.clear(); + for i in 0..1200 { + let mut var = b"KDE_LONG_ENV_".to_vec(); + var.extend_from_slice(i.to_string().as_bytes()); + var.push(b'='); + var.extend(core::iter::repeat(b'x').take(80)); + proc.environ.push(var); + } + + let mut old_limit_buf = vec![0u8; 64 * 1024]; + assert_eq!( + serialize_exec_state(&proc, &mut old_limit_buf), + Err(Errno::ENOMEM), + ); + + let serialized = serialize_exec_state_with_growing_buffer(&proc).unwrap(); + assert!(serialized.len() > 64 * 1024); + + let restored = deserialize_exec_state(&serialized, 1).unwrap(); + assert_eq!(restored.environ, proc.environ); + } + #[test] fn test_exec_state_filters_cloexec_fds() { use wasm_posix_shared::fd_flags::FD_CLOEXEC; diff --git a/crates/kernel/src/lib.rs b/crates/kernel/src/lib.rs index e8f0271bb..5c076b6fb 100644 --- a/crates/kernel/src/lib.rs +++ b/crates/kernel/src/lib.rs @@ -78,6 +78,29 @@ pub fn current_time_secs() -> i64 { } } +// --------------------------------------------------------------------------- +// Kernel mode flag +// --------------------------------------------------------------------------- + +use core::sync::atomic::{AtomicU32, Ordering}; + +/// Kernel operating mode. +/// +/// - Mode 0 (default): Traditional per-process kernel. Blocking syscalls spin +/// or delegate to the host. +/// - Mode 1: Centralized kernel. Blocking syscalls return EAGAIN immediately +/// so the host JS event loop can handle waiting asynchronously. +static KERNEL_MODE: AtomicU32 = AtomicU32::new(0); + +#[inline] +pub fn is_centralized_mode() -> bool { + KERNEL_MODE.load(Ordering::Relaxed) != 0 +} + +pub fn set_kernel_mode(mode: u32) { + KERNEL_MODE.store(mode, Ordering::Relaxed); +} + #[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))] mod wasm { use core::alloc::{GlobalAlloc, Layout}; diff --git a/crates/kernel/src/memory.rs b/crates/kernel/src/memory.rs index 408c4bd03..81535d610 100644 --- a/crates/kernel/src/memory.rs +++ b/crates/kernel/src/memory.rs @@ -285,7 +285,11 @@ impl MemoryManager { if len == 0 { return false; } - let unmap_end = addr.saturating_add(len); + let aligned_len = match len.checked_add(0xFFFF) { + Some(v) => v & !0xFFFF, + None => return false, + }; + let unmap_end = addr.saturating_add(aligned_len); let mut found = false; let mut new_mappings: Vec = Vec::new(); diff --git a/crates/kernel/src/syscalls.rs b/crates/kernel/src/syscalls.rs index 99c23de01..28a48f0bb 100644 --- a/crates/kernel/src/syscalls.rs +++ b/crates/kernel/src/syscalls.rs @@ -8191,8 +8191,13 @@ pub fn sys_connect( if sock.sock_type != SocketType::Stream { return Err(Errno::EOPNOTSUPP); } - let (ip6, port) = parse_sockaddr_in6(addr)?; - if !(is_loopback_addr6(ip6) || is_unspecified_addr6(ip6)) { + let (raw_ip6, port) = parse_sockaddr_in6(addr)?; + let ip6 = if is_unspecified_addr6(raw_ip6) { + loopback_addr6() + } else { + raw_ip6 + }; + if !is_loopback_addr6(ip6) { return Err(Errno::EADDRNOTAVAIL); } @@ -8215,61 +8220,100 @@ pub fn sys_connect( } } } - let listener_idx = listener_idx.ok_or(Errno::ECONNREFUSED)?; + if let Some(listener_idx) = listener_idx { + let (pipe_a_idx, pipe_b_idx) = + proc.alloc_pipe_pair(PipeBuffer::new(65536), PipeBuffer::new(65536)); - let (pipe_a_idx, pipe_b_idx) = - proc.alloc_pipe_pair(PipeBuffer::new(65536), PipeBuffer::new(65536)); - - let client_sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; - let client_addr6 = if is_unspecified_addr6(client_sock.bind_addr6) { - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] - } else { - client_sock.bind_addr6 - }; - let mut client_port = client_sock.bind_port; - if client_port == 0 { - client_port = proc.next_ephemeral_port; - proc.next_ephemeral_port = proc.next_ephemeral_port.wrapping_add(1); - if proc.next_ephemeral_port == 0 { - proc.next_ephemeral_port = 49152; + let client_sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + let client_addr6 = if is_unspecified_addr6(client_sock.bind_addr6) { + loopback_addr6() + } else { + client_sock.bind_addr6 + }; + let mut client_port = client_sock.bind_port; + if client_port == 0 { + client_port = proc.next_ephemeral_port; + proc.next_ephemeral_port = proc.next_ephemeral_port.wrapping_add(1); + if proc.next_ephemeral_port == 0 { + proc.next_ephemeral_port = 49152; + } } - } - let listener = proc.sockets.get(listener_idx).ok_or(Errno::EBADF)?; - let mut accepted_sock = SocketInfo::new(SocketDomain::Inet6, SocketType::Stream, 0); - accepted_sock.state = SocketState::Connected; - accepted_sock.recv_buf_idx = Some(pipe_a_idx); - accepted_sock.send_buf_idx = Some(pipe_b_idx); - accepted_sock.bind_addr6 = listener.bind_addr6; - accepted_sock.bind_port = listener.bind_port; - accepted_sock.peer_addr6 = client_addr6; - accepted_sock.peer_port = client_port; - let accepted_idx = proc.sockets.alloc(accepted_sock); + let listener = proc.sockets.get(listener_idx).ok_or(Errno::EBADF)?; + let mut accepted_sock = SocketInfo::new(SocketDomain::Inet6, SocketType::Stream, 0); + accepted_sock.state = SocketState::Connected; + accepted_sock.recv_buf_idx = Some(pipe_a_idx); + accepted_sock.send_buf_idx = Some(pipe_b_idx); + accepted_sock.bind_addr6 = listener.bind_addr6; + accepted_sock.bind_port = listener.bind_port; + accepted_sock.peer_addr6 = client_addr6; + accepted_sock.peer_port = client_port; + let accepted_idx = proc.sockets.alloc(accepted_sock); + + let listener = proc.sockets.get_mut(listener_idx).ok_or(Errno::EBADF)?; + listener.listen_backlog.push(accepted_idx); + let accept_wake_idx = listener.accept_wake_idx; - let listener = proc.sockets.get_mut(listener_idx).ok_or(Errno::EBADF)?; - listener.listen_backlog.push(accepted_idx); - let accept_wake_idx = listener.accept_wake_idx; + let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + client.send_buf_idx = Some(pipe_a_idx); + client.recv_buf_idx = Some(pipe_b_idx); + client.state = SocketState::Connected; + client.peer_addr6 = ip6; + client.peer_port = port; + client.peer_idx = Some(accepted_idx); + if client.bind_port == 0 { + client.bind_port = client_port; + client.bind_addr6 = loopback_addr6(); + } - let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; - client.send_buf_idx = Some(pipe_a_idx); - client.recv_buf_idx = Some(pipe_b_idx); - client.state = SocketState::Connected; - client.peer_addr6 = ip6; - client.peer_port = port; - client.peer_idx = Some(accepted_idx); - if client.bind_port == 0 { - client.bind_port = client_port; - client.bind_addr6 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]; - } + let accepted = proc.sockets.get_mut(accepted_idx).ok_or(Errno::EBADF)?; + accepted.peer_idx = Some(sock_idx); - let accepted = proc.sockets.get_mut(accepted_idx).ok_or(Errno::EBADF)?; - accepted.peer_idx = Some(sock_idx); + if let Some(idx) = accept_wake_idx { + crate::wakeup::push_accept(idx); + } - if let Some(idx) = accept_wake_idx { - crate::wakeup::push_accept(idx); + return Ok(()); } - return Ok(()); + let net_handle = sock_idx as i32; + if sock.state != SocketState::Connecting { + host.host_net_connect(net_handle, &[127, 0, 0, 1], port)?; + let client_sock = proc.sockets.get(sock_idx).ok_or(Errno::EBADF)?; + let mut client_port = client_sock.bind_port; + if client_port == 0 { + client_port = proc.next_ephemeral_port; + proc.next_ephemeral_port = proc.next_ephemeral_port.wrapping_add(1); + if proc.next_ephemeral_port == 0 { + proc.next_ephemeral_port = 49152; + } + } + let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + client.state = SocketState::Connecting; + client.host_net_handle = Some(net_handle); + if client.bind_port == 0 { + client.bind_port = client_port; + client.bind_addr6 = loopback_addr6(); + } + } + return match host.host_net_connect_status(net_handle) { + Ok(()) => { + let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + client.state = SocketState::Connected; + client.host_net_handle = Some(net_handle); + client.peer_addr6 = ip6; + client.peer_port = port; + client.connect_error = 0; + Ok(()) + } + Err(Errno::EAGAIN) => Err(Errno::EAGAIN), + Err(e) => { + let client = proc.sockets.get_mut(sock_idx).ok_or(Errno::EBADF)?; + client.state = SocketState::Closed; + client.connect_error = e as u32; + Err(e) + } + }; } // Parse sockaddr_in: family(2) + port(2 big-endian) + addr(4) diff --git a/crates/kernel/src/wasm_api.rs b/crates/kernel/src/wasm_api.rs index b7fec50fa..79d210743 100644 --- a/crates/kernel/src/wasm_api.rs +++ b/crates/kernel/src/wasm_api.rs @@ -2318,16 +2318,15 @@ pub extern "C" fn kernel_exec_setup(pid: u32) -> i32 { None => return -(Errno::ESRCH as i32), }; - // Serialize as exec state (signal handler reset, etc.) + // Serialize as exec state (signal handler reset, etc.). // CLOEXEC fds were already closed above, so serialization just preserves what's left. - let mut buf = alloc::vec![0u8; 64 * 1024]; - let written = match crate::fork::serialize_exec_state(proc, &mut buf) { - Ok(n) => n, + let buf = match crate::fork::serialize_exec_state_with_growing_buffer(proc) { + Ok(buf) => buf, Err(e) => return -(e as i32), }; // Deserialize back to replace the process with exec-sanitized version - match crate::fork::deserialize_exec_state(&buf[..written], pid) { + match crate::fork::deserialize_exec_state(&buf, pid) { Ok(new_proc) => { table.get_mut(pid).map(|p| { *p = new_proc; diff --git a/crates/shared/src/lib.rs b/crates/shared/src/lib.rs index 950098fd4..2b0f12907 100644 --- a/crates/shared/src/lib.rs +++ b/crates/shared/src/lib.rs @@ -24,7 +24,9 @@ pub mod host_abi; /// with a wasm-declared reserved thread-slot count. /// 15: remove the obsolete `kernel_set_mode` export; the kernel is always /// the shared point of contact for all programs. -pub const ABI_VERSION: u32 = 15; +/// 16: remove the obsolete `kernel_mark_process_exited` export and split +/// `kernel_preadv`/`kernel_pwritev` offsets into explicit lo/hi i32 args. +pub const ABI_VERSION: u32 = 16; /// Syscall numbers for the POSIX kernel interface. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/examples/mmap_shared_anonymous_fork.wasm b/examples/mmap_shared_anonymous_fork.wasm index 76b13720f99d967a89d20757e9a1c0ca27bf1e43..1f4b9a7394b4c53820f014d0d445a7f8f937478e 100644 GIT binary patch literal 47148 zcmeIbdz{|YRsZ|FJhz$WnVBaSl8_0o_md$cB!SS<2?bj6ostkPg|uMBS`9-oz+5tu z%uFEIG&7ZeQK{musHpfe`oj``CN?T6KczNS)Yzh;qN1Wk#fp|Hz7DU~!Pk#t&ik|W z_jzU}nIN9u<(xlGT4t^9zO23X+H0@9_S%;x>YvyZM^P02K)mFxD7q_(r|ycUBK`wO zroi|EnobGVUp@)|z6|hL)F`7Y{P0VP8$ zLkLN}kk(7X1LLCuBh>)5-!nEo9L0HHR~~rpz~uJ+@q2f)BI0SO}k$b8x~cv3|LRk5SV0%~^uK&x6!{txUMoQ#qb_H}ND1^uJ+&Gx(dCkE1~SfQm| z1G~n?A57BX^mLqFdrh@iURY0}QYlK>lca5Nxs()&y`yP?oDVP%whxXDPAW(Af%CWXv2$>I zUgd^A+TOo&=XkWGq{@y@MoSmw+4GF5LzJEO!0zGDGkZrLpeEbz9=LaKH0oV+ zX86G9&ZuuD_4vT7)K|@<&cl`ZL!B)e)L?QTx}g%UYwL`oEUi|$wr;)fgDbDNtQM#3 zKlEG4HJ^PVPQDsX6%VW|X33Vqrj@1DNtC@*9`;|1SC-rtFCw;!gym z=6@|9bsv%}_ThV(Pb}pMpBi+&V6exR4X*af46gIe;0EsvUg;YKuktGmUh6M1xXG_D zc)dT*;8tHWc#BsBS0~&2LW6hsc7u2Nios9#n!&q#o57uaxxxE<)!>NlFu2<Wej@hWhCCVj3oOy$*uNeU9f|!? zi#rRkVt=tAFUEeAAuq-L5<^~& z{c1yAiTxTwPR0IGLtc&jT0>ro{W?QlkNtW>-iZBWhP)a3%ME!e_8Sa&JN8!?GM)JM z7;-T2?=|F5;;%I1(Zs*ckjE0=Yslk?-)P7aiNDH_ClmjELk=hYYD1n%{56IgN&K~j z98LTO3^|_o4;u1J;;%F0*~D)$Wv%8=s)f54Du3VzCvXA6EhjH~Ag{?qvgEBJ2@*z*PdP{2+W z{K0^|Q1A~2?8SnABw#NU{C5QG<$^yHuvZHHI|FvA;J+(iuNM4w2kf7mP7yM@e z_C~>fPr%+R`0owaTLu4Ez}_zS?+e&;(f_A_9W46K2JBGLKOV40i~joq_E^#XK)@a^ z`X3C~6Gi_-z@9An9}3vvqW|H5JyrBS60jph|75_97X6O~?0C`t=YTy^^gkA`XN&%D zz@97m9}n1xqW_72Jzw{8&1?))Kp9;J>t3t*(r-6W0wl%000w zXD8Ajte0~3I?w@s28qv(x-5Pu$c|(bte) zcs3hXy*nAHzl*BA6RA+YGWBa)Hk;I}mUf@BCTY5ScAhiD{QUXyr7G~knOw8VSv+6* zi{5=wHMCMX8g9%;R-^r(+ zs$OR#{i+%oZ|PXK9P%BPSbcmYoANd49IuPwMq9RlLbJA;J0eG=lC0+jVHx_Rq7{h=a+hZHAs4%(z9dT&PS51wt{I*xyw(ce&F7Zl^}bhS;v5A_HCprX&4{5uvKOW z8?FJB?NsTf2fL&Nyqa4}c6atc9>p79{Xon3b z!p$9M!KQM&m;rnG^e6W;I$br4-96pDPVK_~W~K@%-&tS+OEqD%;%A$EN|7dMK}x?W z-E?MTP|kVYxs>B%gH07=nhII1d?ONSBB7)NF|v9BUf#Y(YBTHFlcig_#p>?HqU_hA z;m)g;xDK~u$r3m7uTyB+{9fWJMa35hXp0MpbL}otyoA5z%4^^+o~Z1xO3hSQu~~rt zU8S=U#kBK-#nK1i^y-A}Sg5Z0z?`pZTVF>vrcv#h*5@^GRJ*$Mb#)Zg-rxEXnX6h~ zmT_Y>tGG7QXywC5qpQo_t!HmVev#FeYT_)jN;ke1Z$pO8^Cc-?w}{CP;p8*j${5|lgr)L@>UGQVETJv>tQHn)X@NL)vUyq+QTPEjkkzv_BijHg_8O{mFe9+`h z_NrxEKwm~%_jGKani_hp?iO_WPHH;S?vHjv2qT(@BF<}+n7Y>RK?S$D^I#W>0Pz*S z)QF-~d6`a4L047l*P#>Z@6~*6!pUHHw#^m0tD+MWo5_=@xZAoLwFZK8gReKmoF=67 ze&nk9Q)I-+c#xihpQgK+KZYA!mWjm7&K1iMyDr&X73IL9V-A{?3bXDUsnS`g(sQJe z)u9Sxg=S(kkNT)y$SdW-zff}Iy0QW)7DYe(?FTDiil`1_V`bBy{`Q0W_5>iKQ?m$U zNiVV~W*!}@Mo6=hGqhwm>6lfNPVH7Awn~^Qio>oZ#e)Hj?JDgNWLxu5$I8IEj+M2w zvnBeY9SI4GHFd6pTk1nhRiMLO#UP4Y-LLhR`Za#FzohCeajV@Lcd1)jKOf<58m;SV z1;}PmZ}N7+Z1A=CgUMS$%Gp~9Gb00qnLB({F=|D3e_a4?l<_${bhc=UsrYO+>=AkAX7y0-4_xLNS?h5xF_g>8Zv{l8mtnr$Xz$zmMf%Sx%D06ngU*$LYUjIIS zWz}8j-sgJVMt2qM)NV~!LtMh7qcPdRm@L*-R$Un7jpCjT{1$6cT+Jv~8{pc~u#S5@ z#WF|dTzBYcQ%$C(q^hQ;9@YfJmPhu(72loP>NDDIN%o_y;OuZE`;lDZE$L=&WXa~L zzaS9S2tR%Lx`)^w2>}}pJ&giDHrU&AJxD^yb=+oRa^eF+8Z|opYn) zETN>(V3f=V-AN;M@*Rm?5rj^)qB4=c#$WB3-rw)8cGn({ znq`s@XdqCL|DgYX$C$X*eZYMX5~UW2OIf!@8p|ve_u%_vc)8zb)^5rb&2X93+OnEt zor9D>IP;tQbyatr+vG9`ms^C_EH}DU$Zp1yDi;1`lkS;=w<4ml3A5?8>}T3A85~Rf z?(F;8vY*qeRKjizWuFrkQT~A?^R$;59La^?CudeIk<5 z{&%{*U0vV48IP#y{g>yeH{{RZ&&VuFE{$e)Z1!I^jD1%JpbJgA@OVbcBTbbMPL*VIVOdPWD)nnvvN)A5H32T+e07_;NCLq+MzqCSU@Jwo%c zf6gNgRvWL!p9-23;}w(3Xz0&3bU)qDg3cA6{X*lPni}wQS<0DK9>+dvPYyU@t~}KE zXX3j)QrOh24AsriQ=rh_)~$VQ(f#N#Gi5c)_C*BtnPZ1pepVTG68D+6PM_`~$AhY& zw!&4CC|vjOrylA1x6pj6jnSh9pH`0jB&|BWHaw!lg+m8hb-HeUUWaQRsj|YscK9bO z^a_nR-0XDq=y4`{&5frWIUb z?wFy=%t<>j7)@Sf2ui)=t!cwVw_)3b_Q5!8cjtFQWN~m=kgGJDC41fCo6t+vNQj%| zr|wf5Al>EKl_+Z)!3Q|2L64trLh3^ zMK|mbpOA-~B>HjZ)(K8;frcx|n$dvvHHVfd1-M zLF?uFMh65%&fh6hk@@Gdo2YA5zB4h-B&UpZlTS!}%2{WM;Fyn%{X+?3iZN7QB~j}{ zlq!g#nH=$^af9rwMpM`5OuAqw? zcarq~iSSuRNyxN7K(HYrfm6+3Q3)9-HrkNwMklSw3l9o}TG2ly0t@I{78I2|ez7U8 zDr1!~Tf6VdBanA7!;7Y&iDZs4_R&(RQqk2I z$2CN^s%g@mnARGG+s*x%xBuDYAu zE$+kEs@0aQy5?Q4bhi21{H;}YtGmr@gG8-G;$p3Iq=U2|dBrOC%DF1np_X1ECWckc z?f#?wBUSei_fdB{L_1nUd(N@&=(KJy3UtGCL2HgVov=Zl^LE0oTh)I3TvdZ|oZ!$u zdk;0UDC_!GJO9Lp{mDBLQ;#yawbmRT^WWg_sJc7cH@J_{*v?jCFL~FU@J;?s|BY4m zjqXnOO_1nnkyuG5T-Z^P{^WmMH+MHW{X8wHtC1H|uOQ#9Diu{k%b5E~l}(w5wX`)? zY5S~EsxgJuwoxkk1^dA?w<`~2zm{rym4AFsNPyKi=% zfLrxh+(KdCuN=ZV*oDp0O8s_t9dx4P}n zUeKcLm(N3Op{T7CHEbejv>NT!j#-!S?fd;*Rd<)`cXz{+g)N>m zB;cgde2}xfO=c27H%X>gOO{OKvw(J(UtO3f;sSr(2pd(iKCxjV^J6X| z8ge#lTZ-`$#`e|oN(Em6(XQ?#u_k^Hw+Lb}_J`O&Hybz^Qf zWh`lxae)b`ERCA1w;^msv*m4jR_2+6vz9F!$`&GOd;O%J@Z)|@)$MWPZo*Bvy$I*h z7P<4AHAT8EF#ChT_xT6>{Z)6rd%*3Z@MW#k%UW7vju4!0(_Ob(VcylVraO68=b!F= z&)D;O=VA|(<75Sd&(=8>b%Dxk@%s0T*zdn1F&j>^rn^u2Z}SgU-GlDi+$U-4`K`8g zzw7Dlfd7==Uv>N4r`!QZEN_umiagM8{bz>gLKzf`vgMOlo>|)->~oFJY&r6rY|2MVllrUBEHPVSspNN9*;h*dv5XrmIPxHhwWbkxx79)6d!kS9i@s-I4MWR_cad$w@ zqBSYk3P`BwvLU6GF}R@1D9#q4>W$c``HA5WBmxqrf;X_aoXg9Cw;`Idtt5K+tmxw* z`h-QFJS+Neh~~sFxt~2NTBl8jIBd~J&WhGa7NQ@s=qJvK)(IG*r!D%>nbB*~eK~h_ zi#Z&RphI8|Y^kl+@|BeiOx`ris?+>A&2{Y5!-4DOpyC{lb%6>Z`I>ZVShZd?YuWp7 z?yKCuiZ|D@I<;eDb5g8yGad1Pz+L)t-%Jr}(mV6~+q7avh=P-~O7&zh%wE^bqx5G_ z!Wd9D+9ZsjG*{_=F?B21Okv@Xj5F8dM*(d8vVgCyq4}3EwkeMkMYrr>GBKqds?aDV zQ#c~bAu!wJTB2CwF!);yk+W6oo$G6TnmBW|#^#9CQcoLG&5_V_wwB~mo#kvTHJz;% zG0WMih%=q7)UJw`;pr;=>ccCC==1!HL+O{kAU|vo7TIU^QRIU-g`d{s zqY9}CXZU4v4Fa#ZG)B*otSP|LQx65!4;{lzdr+D2(`qn)wVboB=gwJ9wgpK=ES+R? zwFF;CU4ND}+Y^)zAyK|_P-)Puwl(0}sk*I=ISe9uo;9?bL-_JGFYW3bxR83z^{ZQ5hUB{J{!V7Ebc#k43=5R}gD?3&9aO@t5-FJxe1ufEx#h!V*85df& z7@bOU8O?UpOs8lWJ%6YCl~wSs&Q(Du$B7;0KFgP3Q6>tl*8YVN`-^uZrY6kpl<$iD zcgEa9s=7n5`_9;XmzsQGtI12>b+>#b_K(K?yCL-5v3oRjpAn&pT7(wO?v_8aivHob zD(ckg^YV(agl?y69*eo=#GR+A`<~c+Z|oiudDkMp@El#^t!s=HLssJ=nsG5{b=2}M zH8)fL*6Q}R=c*e7<75eSJKNH;D3gL#kNlPq`>l5*rtY2HBcF}Avc>(as{6j!ae3>r zYN>Cv^t^Z7BR>#xyNjz{RmXKN?s@%y2sK)S7R>IEm#m^MovWfN)#^p6=mMM~5x2p( z8pb`bs{6s%aY^ilM0#b5bp0H?(PO<~bQmbDJ?@!-a`y3tX1DyARq)TwRY54n$>Mpt z#iC3CTHW#`Ble|tB&I%|-7Q?g{gIfKaU-EX}c_v?ODXMc$9C-L_Gr z`CtC!GJ2=x7O+#mUIpiM@Xxm2+)i$dYo{HU1iqIu3Lbcgr<^`+h z3+JlnYPFhO7;VGU7&Av=|8(p>2kFnn?&;Va5$V+}($#Z}nKjliMu(0GKH53mf)+cY zTWac-+GhVZR>8kHR|TOQC%VdXwkoqI6Nc89`LYrF@;ee!A2VhoEY>YQ9djF#tC>~z zQ?dK6vHNK?c}=Uyo$tC^ekSJPCbu}N?r7|e#qMWB=+YLU_W9TQYmIdssuW{mMvyn? z$l4r1u8L#T&D;uH+i(@Yvv6tVGnDKZP$9^{E4k{N?tM2o$ zV{Q4fYUtWlLp$Cz$DWC~uFBojs^k7D7gwJVp>-`nZL&nH~s^ju2w_blqq}R7dS5!yo5RT5IL4$CJ--II9Anbc`w#hU|Q@#JLRpEEf zRfWq`h4y(x)uOC+Eyn+`5&PqJB*uaFIimW@F;{fCuUmE8+2ykCFRM|PwHno?MyblU zAadrQi3~Qh`G(n)!wS(dN22SzaT=Fw+I!NvmC0*(fSo_@vjX|=XxNfK? zb_=_z_Fe{WWpL#@>(C}J=&$lo_cUeZ=$y_T-uxH zjSE-k<Eg zfamol*9&s6M~@#kHn8=;fvyAS5d+uWo9Lke9i-4>1g_Sb=)nQk)|=>20k^z2*(q4< zP4qZ`qiK2oz%A%abjja!_9nW>@9MpYF6+BRy@@X5v!dE4Hwai)Z-R*m@^ykRFv0Lu z+f8=Y-J4*A5OP6pVt3J5G;)OAf|m9sM+~{JH$i)lNQZYATVVeTMt_4X?oCb@a(-{} zydgck$w@;l?oD1WWL0nSq9K>`CU(==t?o@;wvaWwiQQFpm-Z&7EM#qO@~R>0dXv`- zSs7WiYx4e-a1t?Q-0kqkkUQXyA>ROh4EY%RG2|QJk0E!$A49$g z{uuIc9UvE-Z-zgHd;knw zhTH>x47nHn7;+!{F=P<_7{a-wQ-*L1@l``c;Ey4@>?{&h9fdy@G6sJP*$saT*#mzJ z8HYcnGA7`UA(QaOkiGE7ko)0}ArHVGL-xTRLmq@bhI||RG31l*$B_N-$B<9KA43kn zA48_#k0H~jqh}5KG}v>7eLL6*!yW>A-mrsUCk=ZT>;=Of0ejJ~?*MzrutQ)k8}^-G zuNd}SV5bcGZm?Gkdlc+7!#)G{x?$e~_J(2K3-+dAkAb~q*!O|GZPFu)|=_8usI0&l&a;U?&XwFJR9b_7vDj!+sL%1;aiE_M%}=gS}+f5wMpH z`zf$j4EwKOrwsdPuvZN`3ig^|$G~1U>}SB@CB77VK@qehzF}?O|TP&{TA5shMfdEY1nUryJd(^N$1AEM{KL>lVSf#F z)Udw+J8szDf<0r{t6_IF^<8TM7M6Nddg*z<);qfrQZeHsGO!6KVU>|RJ0QU3T z765nWazR{{-GYZ=9_;|)D7!!qr`CmnJo$BzAU+i*h_As59*%uOkR|C#L6(p`f=6S& zMv$e#rGgmyYXzT;{W?Jm(Di~R^d=YZc|FMmJQ@29f-mSnF5rtX?{NWNiv4@feB9*X z&PD9%pZPD94L--7XPuDvqFGjvng?BNAGvES8E-eUG)VE7s-LDYH~BL&=a$}`DpdBIO4*2VN-q<0t1~L3_F?^ zX9sj%_8$({d~{}+T-nuyXD$YJwAr0mZU>dqQmMjik?9H~3`s!B3Mqh;bRQW<4XB+t zj9fvN2soZR9lu@GSA2DJBFVH#(>IMNuh?VtYxGb-obt$lz8&X4%WMEO61H)YK|9ux zPz{&tX(ZaR%0Cz*Y*K7FBcM}TL1%@g952@1)=XB;d7hcII8UO|oJ4Y5CE9&sn`FpG zETFjPrHlE~+Ct&kglhH~-W=d^_p$o>>l>HrQvb5$ws}>y3;z1EV(1tva*%5qJj;;9 zAMQSOps#+YaizO5r2KgI(=afFsr6(kd*-#;YR>f7U7;9`aMYWkVOE^IR~uf62uCs$ z5fjgq-^lUiADwPnsB}APDCw+5MD%q1mTu@LIuL^#&<4-OP63J0dW}ptl>Hic4c3^T zo$;2)#jIOdI##pq3b1^BowXrm_n zj`gK|zHh32fz@bgOLu)^wUJ~~Tl}Re_|ka`mMF%F11+dLl1c4()q{Lsb|5^ne>>yXWeQf^@1 zE6Kj3(z2+&N#c;|Nb@a-N1DA5ams)*c6c*u^#->Ma~^RxWGh!LsmZ$eYhstzy9I9Dw!< zIW&ulYN1<1DdlD<-1M&D=g5v#{Nk!xTz6Te8Py4Ae;v-o>c_TbXh-PB%T<-j=dVh; z8Y~}F_WfNlvg4fB^)Q)%*K zs;`z+-7-q6&E$^l@s-(qb%lssF+VXo{A#{Ed7c zQ99F3U&Zj}dHU)-D&;-%mtu!i%~xcl)bmmfiy`$Pf1$s?uh3zt3*3d2(%CG9Q;qg2 z%pX+zMOAkZebv>BD%Y>fE_cZ>7ayx%UEe64y;l`@@B9_8gP8JsWp7qIqr<_odW#mt z=&tckwRub$OcqW~<}%4fR~4O~>TCvzD$$Du)`Tc;!pn%8@9`@=`^^ov()CdPh0WYa zmM}!69-fz2*?_G!eN(I4sw_2|&EXos5_9-NFtg+h!J-N-sZ|a$5IaeZC`=;xp)h^q zhQh*;mkL8h&LoUH`I0bO+?5+stP^>Vt^(jVx)#tb|I;eDjjjiv8@B>*9^C@KXS5A~ zW9SY5o})VfIFvpCz%O+d05{T30N$PZ064Km0Jx2I18@vY0`Q3K1K{`B55P~wRJ%$Z zrGtX_zYYoFSb9_tC(>hr_{VTPt&$%N&(kXT(QrJik{|7`;7UwxLA*#u1aS}@6}%K< zTM+lqGlJ_dv;}bvJtv57=!77yq2~qh44o9jG4z5Uej$8NtK=49w+>S`dpXW+>_6>_ z&zmwSYQDOADjN2SNksD_w^$bTB7q8)E1-tu3Xp{jkcAD5 z*i?B7+#;?*C&Av_baqV2AVxm*08z1^8^AmOvmSr5LM(i7qde^58~suZuBG!0u0?1g zS)7hD{f;4`c6R!X);z@KS?7W>vz1>&b|&mp{{1tm5~kT^WwtS#J7UKZO0j5ijDy^q zG)^};yK`c%x@7HJlm>fDhqQ-e3MI`-6m`qYYQiL-2}ip}yqe(_swQ!>CJA>x>~W}w zdug63>QtOHVX-gEMk%S9jt83Uc?&zcRJLOgvd46qPRHzY+76u{qpXlWYr(}*e8ozM z?WuEjNl3}@7Nz9s=GiH+@#M}=L&`LyIr(DJi+Iq|!0KL=#YuVWo?XWrg^H zF&W*JhBJn=q?pxp6RId14DHS5FxtCrkBsp?)tV&Qh=?0j*@!YS?aZf?bODl5`J|Us z3k|wsRwhh9vqGCh^dgorw!W9?d)dr4(~C$0Agw3NqO2=FUY}KTpm>J3tXop00?4wO z%QG%YfMKCgS+Mq#K-{ndNuJ>HD7uEuD(GmDD5wi8LCiyMQv1dhrzVM5b4db!6ZXAg z_7*5c=9WNKzhAv1tO6$Cv;Km|L%t0mEx{AEcZM2NP0vuMdt?+>vdScxmfep-5lV#8 za}|q%)TFD-M8U>#R+~3hO;PoEa+TCUoLN_Y@<&{>YKXzQq($ut8XZ(z%wP#4*)e9k zbFJ&Db$#Ai6IGdsnp5k!x#rY*cCNX#UT{{e)ngLroc|i=R%^!FJJ))dYF(MP)_WES9y>!xx6hY5?ZVIsoR)1^`COl>jqg7RzY4mM@GJuG7kBxgLPgvK4^Qati>X zWg7saY&NypJU9sejb|l5A(g8dbXqyW5Tp>T>$%`j6dFG>&nClKP z(Ix^Nd^O(;kO1m5BXvnyf!fUE0Rc1;J8*iQ1nqBc3fusax~c`i(dJ1>UCHloT_54u z3py(9BXu`-({=YIe~JJ)&BW0`UCHE5B}En~qIpU81gA>Ft&O|{onmRCWolA8sK>p^ zoIjzghx)F2#1}Y`8;XgsK@!R?Yoe<5rH{C@?*kkGOq# z{z3W>$3_af5xHK$f#QyrjT1c&yP*y$hsjKO|F~H!F`F zqz0PbsNAg_wil^SPciXPZmrKwU?&Hd>^bhkj&Z1AMjN1bMh(lY8dh61tc4o3K{n6L z1q{nQQ$uccu+pW1IB%L2tXTzXRIu&X!7B1r#KwT95(GF1=l84o_T!+b-f?WdRG&>w zM0`BNORPxoab1~mHJ89QSN+(rPtkgM{!>1YvLv})ZxDwu0hHp-4o!H3TlzaJJ;9`R zU0Ul`T(Xg)ygU-Qk;A-XByB;mBLXIrXiQ$))ZDGd1UV(`Djfa=V;oHJ3Mr2a^5CVN zfzBL^4JVE3cOc&gNPS##)8`N?dlidD5XE-!Njn}qAv7(K$<2sO;Z5+0wttR z>lzI6m~I^cq%~0^4{b9+XK45F@J6!j*nX5nkez8oG8LlK6l>(n4yV0*Y631xCF_OE z4iYs*&W_xhOxx&HYt_oWh{a7=rV~x&bt@_n+E~ak!jBy?wIKD3#>}oW?yI>f7`+9D zjlX4`rib&10`tk_i(Fc$wk8&aqpEWd-UKJ8X<_VH*RF9zi}W5!1)CUW|L1E-I@NXn ze#yUW&vLcV+SUi-)};?OIl}7-4^eK~nv}#^m^*8h3UdoA;c?PNIZR5VD2P+K%U6K! zZ8m$EZ#P9j@R_Hi7u)BsF@bYsQx8T!l9lHOlzGRE0be<|O%E!ck+o@ew^Br^2R&-V z!wqy91;*Ccu7?`*&;e=_8qZv&!Rii4dMCrrc1+Mm@nc6 z|2vRFu3|Nt!wlQz3f+xFG`n17RyZ#JbT`_X)u-BL^G2TO`6AmX>xL+RyY#_ljglsL7%00G!D4 zg`#4Y9H8EL5_sVrfe)_C1frgEo%Ze7tBBkRX31T}TP?ym)mS}a6)eueb4fQYkxi z8ZX?rW{YxbwxBwiEy|iLf`l?-gxLaHp1Rv?q1VLz$_TdP)U!M`MW#9940UCi4kL<5 zqY$RZLiT-ATX~pMT&C0W8IjonrXTV=Kw zUcO5wQ1K0cnqMg(-z7l4OMrZr0R2iBz|mKL9u_`;)p!{I>!=i91Kvu%CcTpFbG~#s z0I-Q!2%yse0G$p1=yU);rvm^w9YDT2{aSt9g(nlR6Hg}KK0KL#5qzwG-5e4COyX(< z?8DVczWsikz7FcgyMTvcP744Z)vt2_IS#N~kmCT|f*c1}A;@un3zYqE?APo2h>i^a zkLuU}kYfNnf*b?5Sde1?s{}a)aEaiF*sm6Ro?`>D%xC`B^tHOq3cgl;NP!5S`_>KrWrEV1dR8~kcf zUOhkM*iQVqIE!SNt0&7IC!uU4Y&M2hIe2Fs+40VnbJ7K7G*VX@=?(9&G3ziM@&ASj zBJ1h!QEEayoZP<6UuwIx?o#I0cHYm)VxN~%%1gN_-!5*kv(;{Mza^=dlD(dDQ184# z`T4P+;Bal$?ve4F9Ke17VrS;yj8oCpr*Ijxj;d2R47ua$>ctxTH~ z(s>TlJN&kKozw*6jf>?R#LxZC?OLUFt(w1GP>W4;X1D7+zgQcfRc(Ia2{}(a%*YQt zTzEI_TIQGfbzygM9gk`)Q@e^y3dy|fV%wK?$%mi4-1Dy5b&1+_$^7lIO=R0wnAKk^ z=4?p16>3+hSu6eYio|0^a_8xw7d72n-uVod z6v|N>ROmtNOH}}DOQF_+o683`mk(|(AC8RsaAMrc>4j2}w`(;3pBbu4j;#%VOE;!h z0&I)Uhb=lEw&;A=qVr*k&W9~JAGYYcTe~s6lYBV6?gHTS+6lnvbsqqC+z5cJs@(v* za+3hOa{B=5Hm3Ul_{9#Q=(S^aNRTZ%tOc(TAudv{)x>kdo^K*&U%I=~G?6X}G}s8E z^Hh#)?bvis)A|zbXcs%JUk=H~ENugdj{lK|5((3hCeV{TGk4n?J#NLkkt zcJwD6e#5XlH4yI*AjXcuV5xdbO#t&)9=<;fI{I#0?qf92-1`UKM9IVSePxxu1PiKJOSEXUSTx)PD0Oz21 zBG%x8uLV${xpSq#hHHlOto1? zi3n#FHQ6dI6~sbXWt_t)Fq?CF=_!y{9)4LTL9~J##H0kJv-NVY`FTkjgi@!&Bs>&D zfcQ;5#E}~0oFq7OPP4Q6S|LnTk~k`g4vuIseyd}&PXmQRHTeiBhtd?4{2d^k~DiA0Jqtc5Sy0)VEPIGML7Vw8a)7}kktTF{vAR2^G1x6 zKRE)%F> z2m@r+17y|%WYz<8Bmv;ij{qG>0IX&_0oI`{0aB9y9Z3LOg?pbR+?wBMAT1Ht%V zcsW!3ct(qZ?X02|p`;ra*9@o^5u*(nSdyklO{gnY-GjoxhA?*{A$1ndoUEmEPHnJ& zoxcTk0zzjp^gfXtyK@U@LELJACu>jtrvIfK~Bnn6M}5D~^;nk@~c+0w8Bxl|m~L$PG<0Wd6A1FUjO!{}TZb|9B( zSYAb(?LaOKJCIAm4&>6X1GzNpKrRhCkW0f3pqR# z?Da6X-zp{XRH98uq#hv;Lh|qZMIZ9&{K5#+0_mN77+Wrf+%yTlc!f|Pgz&1ydC0Tl zu1t`xsHWpSXXWGNYsGPV=IppUA167_j?42Qa3L-$5AhB%dOSQRO^w1%QmxiZL0qf0 z?}@EcRP))C-y7mMEPHm`1~$m4EoRc$aaV*m2JydBBfKhS=g!;17WluD+Z;aP{yAIB zviofMIWOe_J69htMCUDAoMc}lxb@`M!$~SXRqK%=PDR@zMMa!^XqVO&iZ%>#?^k%D zsKC~!?Qt}HVqv!=xTz9$X5=NL>-cXDk+3g|4&%TUvpA0v6(4LApVmn#YC*O$9wsW9 z_M%#4l4wsT-_ucr5Nk@!pCu~Vt3)^{nr{-}&(;12I*<*=7G|9ep`1NkL)mVLT3u5o zX>(Bvm~u&_NR?E-R*LI$r_~cot8ENUX5~mvDHfOs0Tre|05gGrrd5DeAAn^jWxxtr z3(&L*khcsVZy7+V55NYdPQaB+oq(&DIsw;uPQ%Te7b>g*G!G`5>M>R(EIs(ST6Tgp zCk7x+LEFC+7ou#s!c*@B#e3`{GkKW{vguXW%P=x^(QrMXF3f9z861p5Z(kivufpt& zhp>$cdh;^;)E+qr;(+(*l;cn=E2ck(A*qaso<2`i4)-eZfN&}=rk1H$>QrW?PWXhg zL!|a;UaeTw`Wz1!Y7+|MeYjr0M{mdc*s2VbBQ{j-b8M-pDwyG_U23PgW@=Zq+R-RZ zHYJm)kSZFxM4ym&v{lV0d#s78F^o(2bP~InwE$&i&%IXbdkgV9p>uC5lkD(UG4-PC z2q|c*rtT>Fa=VMV&Fs#edW|KJ5X_(&-%`j)O#VCumO%viW02x*QY|i9YIy7hU?7QYn zek@88dYk6O4FHzxR|2q-uL58tU#pRXZ(JOz{zD;NGTh6Ttq4UlUZpuq;vU;}8d0c67hWWxev!vbW(0%XGiWWxev!ve5j zn*&k9v1#+vJBu<)@#yNc0VJaSUPS0|`L~c>ti8CHom^SJl;2QFvd0vR$iHYXoWGnq zw(D5AX~Eo(tX&wN>-E!TKx~~$GGue5uLS=r=R@S1<*v&LhLGF^Ecc7cv#Y};lx|c( z9$3g+xIEIGCCxBxp5iCYNuKHEgk;)G$!|HeoBy)jjf*fRi3!>x0bu;}1l5s957E#K z#nu7#1#Urx=#Z3xWx@K*UxM2Wi5CzhPW;HG z#EGXxBu)#03AP}ZVElT6yexQcv`*q#b{zuaDJ9nYk|P=I61Bf9#2s@*?c}@jDwzXG zqJ|?#q6Uzt0VHYwi5ftn2AJ(t3Zh1!L=7NO18hR30oQwO3C)!R`IMT=ooSXgtatJh zGs0(nCo{KM^60^>LN?7S?;uj?@aeLKRlkv(ucOU*u6n78^Sl*=mEu@^x4c@c!nv8# z%q>T(3YcZ^YdKDTpL02m2Gw#Lb*1GvI!w!PlQvs`YdLPxW{dN)m*XaFwhq6FlxFbT zI(!pfD5&d^ua4|wkgpdC9L&%`0=Zz;MR=IZQN@br)66CN`ph?@)PF6cQ`NWsOi|k zG3W~ZpEg_VF!JkP3}tDdSzzS|IPgYKy9D-F@TM%ZYgo zt0xL^`&@0cZrE3lH}qhnA2=*9qXD>mC{RPc05mTGq^tl^Rsbn0fRq(L$_jw8YU-XD z%~Ec9w9n8=Wo%C=oG`=5@n##2Y*4MtUB$Ufrh9nyS#&YW^!tEu_Jk(AEqFFxSqXwi z`DMWn03*yPjLy7=!=_hl%9owOl z9pP(@oHU2fXkn3A%AN~J^jiWeV=O8Tl!p+0{Hcd^FawdFIoIumQg&PfAdT6aJsVQ7 z1}J4ul}+QbnwBUHmj_N9kV@@RT`T%u@Ap>6!ZeO5SIti>VUK zlfI`3()ZL-`kqDveGibn2T0!or0)UJ_kbQI2!O5^0@jIL=5%Nd$QkWlY-;~{5#>tn zAx+bpSbg`z^^zR6({70W>SeUJj`!C&1!73LNt#npZCbtSvHFc4EG6Z%Q;qC@4)8?*)oW{6+m^{gX0~PElP#t(InjnRT*?#{ zsVnt^3N{8g4X*wz1ua?%T9kui(xT?w=>ed2R|By1)&aDLRVQ#xkbYp8{VVo4yVHnR zFz=k!@-&yrdnpr7_)IdnN(d#ACJD%2k>=4U9`j~Gc`ZT;j(Twjq#4dHB@=G)l5tay z@gi306%2lRx-!zBCFmv?Rv6CqTYO@<6%ZM*ar<*Av<5iSPFJv z1A7eA!xxSo{Ljs7i(A;T7Pg{EO10mUEgNre^exHk=3IQNPo{7E0HVu$KICJQ6w^F0 z_XdIvl25LQp>Oh7Hi;33j2k&gc9GrxtNvsfSLd2U8~!7HIrGG6h~Qt-5CLe20AvyY zX8dx^vC*81D$P|{c2q}m!V*6!W~X{=N9XJ%nlgmRE6QRm;u{f(b-c=_+E!t4#!OYT zMSSj%K~6Z~o@~mFTM=Aijj}gow2<|Urt$Ko431!{@FR!0t#Yb|wR&^CWwuIAGOh;S zE7hE}K$pKq-Xv5SgX5h39vdp=_L0BG3=Z@DIKIr_nB(tZjI=SoK(_k*XKAwkeccpX zq+!z}L9cRviraH%7>M$uHmyTcXd}gTPq7E=inF%QnD5c4cD6v+wlMJM5C$rPoK|u* z%?7G&PijEnU8b8QN%NngYj#L@-#+0|A4^$=)~uDRx+R}!{+D#iTS;7;J9<`da+ZFR zL&p^~!61{GV2T1Y%m;uL6aY;y07g%9Xv|C-X>-~*gUxBaXs{)Mj>YuoRvB$yEipGl z_7wqZ1nk5N{H)d&qw>?B@)4&q9z>Fo&HFF=Zg_sw#hPGk()G}#UbfAp&JcNAX{+TU z3z%v^$Ql7FFnef9yA3*jtb*fa!7;D4Y-}2Mx;KEx!wHQ8zBB-^zBM=N=9Wz^IHx5S zC|XQ(u{B$fPZ^(2f@Q`=9?u|!6v}4WncS$Opw12F@Fva?is8YX->24BauR^Uz)Ag@ z`bN0`T#D(Wb2Ic8p1q<}syP%iTg=`W7U75=TBhO|wrqy;AYHEfboFq!%8_AGIc6q8 z8l{^XWquSH*DkXiCy2W_UM;O?_9bGEMMa>dA(CT}42vPGOSm%Gs%KjxJ%$rXcV>=; z`f4$Y89+O*J=98et^=b~FoDt;yj@fEFviA%i>p5eW!vG1%+zMP*9YClW?XPx1TSUrgZ-kNvzPi`6XtRxX+UOru>nsq8wE$K#rwNs^WB&rawo z$~a_gDW^B-_rlW~MEZDon_Y=l%4R;B-Lu;L*(uA{q|%IzGrRPt+|x|d+}1g>;MB4# zyq`k_D)>wSE&4MQ?V?h%b?oz3@c~z>-=DBwoaXz*h#6jpKD1T0FK16q`ILddroCiE zY=T`{rkjK~91aY8$qqt^Ev)ryZTY)Th!i3f^s|eZvgjCA$X0|b7WAYvX47MW0fm;K zQPjW|R7cS^cu`6$s_~M@UpUuqQlb$pzlm}=`M@Mnlz3J-<|i}%NiIy)KVU}S@Oy?= zA)!{SYf!e~6mKC!4h7w!;$};9u+heDCa1pK%!Mh|O}fQPNGm1F@Pm!^kP=-b%F0K! zBL9{6uifr1(E5k@`EHD94gykC^i5hj!eaIZ5NYiB8})|@FV~{u4*h|OjyxnEk&5E3 zO|=_?O3FgN18%3Yt6z>|9F3bOHNwFX9-)ZWvtuBu-A0U=+_Pg$-_cETl3O6!j(8MGGFQ^!*= zh)GzW%u&kP?{`T( z@2Rq|--%_W^iX@O2owre z6@E-JR>J?t#o7R084agbtiCX_89d%FT4ogEj^m(@{HpQM1{y(r)!-W=r#Xn9-Al+G z{g+z(a@%khwtP1EbW>1$39V9ohWl&+bt6!%ep@6KP_x6FkA;b^%uA{aD%pFM)O;$7mw#n305lip@Rh-YA>NB;&}AgX?XY0Z|~QvP&Z zIPMEBQFKUz$zr)R!w{e~r}U4chNHX589rD>LhgdC-eJV4sb0v7=sOh>2+wirYWkL_ zQrCZ<=w@TE_WIJm4E!hA|4EZDRr}m`P-i$Zbc{WM`UUlk4Bv?1>=mlzfNsXQq`P{3 ziuH|ax1l1UqLuDf|8+os*>ia0)yJ;azl+KB7#$N85v*inAJ zo!qTnM2?=Ki+bdGWcN2(Sz0M87l1ymRhO&$Pv3Q0VQRL*B}28k44W%t)%&%)!XZ%W zNl@A}irKGezvzg=Z1aJ`VDuk z-Eb%P`VITmZrJAt8XUcEV0>_LV5jRJ9UFac*Vx_(x2u2m?!nP}-HtKh21X~N@qxV) zb5h*hzhih#id}n0$rbI=pV9u&=$^6h;b`K)i5+93_fWk&_6i2dvIs8 zExY~p=+^Ak=*C;OM3+UYNB545tciBqH#oA>-P1ofGO+V%H!;~ixp(3N7w>G(BX$n% zbfaUF&>C>N`&B*nfEsYMyLd-?l!wet6zIKknou!!R#I^=N=}!ichV2#>C>l6QLO(l z-NgJ){{(0MPhst+tNcv~FPy%RJ}4^i^o}U1?1)}V6KGlhut~+fPph<9|EvR>-_4X! z?4m+64XtL`${U|9oi3lQoNhBh&+(3bOc|;x2h~Dp#58F&;o>H84k!vn(Eg|fAz^6m z*_r0V6CLY6%d51)1ZOC&j@kp|5Y~!Os{hsh0zYTBPgM!^sh#dPee0MT7#|-ScS!Hp z_^$rR!LiZk#*vYMd;3S+-49L_;Hb;a!SR6|lVjr#wqkaVkL?(kn25G0#lXJ7iOGrRWBnt02i)Y?m>cOIzn33> zR?hxO3V(37>ft7K_wN{RBLnwLI;+F|gS6tdy?ou{b`9(jeVQ@0cbrsr@11xsx^Z+8 zX6)T9J;A7+9N6V{^pA|pq#|VZ*m(c=;K+k+?`Z%1j9zQ>jidMXk1%2xt;5lW#>XZm z*6$opac*RAbeR18JJ*kmlF3>aNJrNYGCcdx*yt!t7th4gNxH!89vB#(#k$GC zT~u#u@8sOX_egg|TgN6Q-Qa}VIrhM4>tpZe_(1=T`>dwf&YcjRpt~IzqQ3F5;ek=N zdk~J@c=>I4=QjHzZv%Yo580x=k+B`abf>j#9Lb;!GyNCBa}k*7#2Y7&h?&nj2KG$` zMi|pK?;F^mzC-r%;+x#0SNHCcV$N~|ZqhvK@v+f+quU?cb@$lF;0|N58yOqhZSpbN zzbh9DCAtIo*wsJ!pc~swL&bk_JOgz%nM^XcI+LGZf8~)$j8`@vHKZ3 z8e!YUVZSCY`glNtWzNXj(a%_APKmbc9Swq{X=HHM?vVlUGW6CiM4+D`!$6>E>ccQ- zZe*ZN4&E~e*>MKQ{iMD=^tFieGfg2$yVMXHXfvG3Q%N=@nfV|K$tNVrHp(WVTVRB{ zuYbbbJwR_v4A3UZ*fu^kIksbLB=3K5G_PS8@H6a}+}=&J=F6t8nSiGQlT>tKa&R=L zm%KkHIG4~5=MA3en^u_*j3Xx2eA6H~wN|t>yd@EHf+zOwMjMEww_6ng5oMT(xpmH0 zEBihDyQD+rrfrVR`65jbZJef&@MXq z?!EWigE$0TI@95Ki%~V$I#O9PTu`lM0zcG0YONd?WtiN9eszJvZsNYZlcuI-W`+I{ z#B1k+>g3(yWA{o?%~E``Lm3&2G#i`u^^fjkRD2Y9Ww6XBv5)K>V=frjw_{*nCzYcq z=*_{syXfV;SZCj8-JS>8jPYS6w88tvcDsHiCA2Y_KAOvq8y*OqHN$|L36UTV?AvWR zu=#C*%?L%W?u-=p0Exy2hKxn(#fgD&n~0)lSM+f7V-aiK7Pbh;(_>EFuT@9S4C6l?Ej~*}l3Rw=%p>bYL#8_VBX{`vGr)03>%_3!+s8l8Xs`9h2O6Gd(MQ`H~+J3ne~ zCq8X2M{#!>E0?IPL}WE8DNml$Qs#d;n;%f_P<#hQZq%mWwury2_-48mthKNxS|r^5 zw1r^)&W@jz6tq6;vkSbSKT%r<=I`wISxG?`&iXWt*q^o#%wPV$oFvPa>+k&k=ENKG z=1-NOKJZ{{@o>M_-tH|g%b}Jd(V4d_waVDLAUQ6ePD2O=k~h? z?j0PxY9@GmU?#Y4(V3}H$sd{t%+nHhLuV_c&9gi^RP5S5H>}h#6TlP^UD<}l-nnJtne|fm6KmUdN#$d@?;Tj% cHH*&rvE7qvm&o*A4_D1hUoZ1(1WV!n2J&50Q~&?~ delta 9769 zcmbVS3y@URnLg*-d;4{}r|%5&VtQsc_fF4?hYX4X0)aWL!$U+ySJX0EBN`IzA>shF z;sd%_26ooRX7ozQ`bdVD0NKEb8DrQ)lgVy&WsS9E?QU(cRVf#%QmI&4hE$m?Yr``8 z{pa3pdWQH&pt$#*^Z)1n|MS1kX^x*0AD`o6-{e1j`Nbdd&i$fT)@8{+qZ2W+_`=8{X1Cy>}e=F@7ca(^Opv9Y~8kV z2V3pmX?WTCnh+y|-Fx4?JDIWv2k+Uk`ToJXHt*PCSpFeno17P~;!GKKtY+=nrj^Z$ zE=zJF@wadA<-gk?<`k?w{?)00TSTAeGx`*BEdyVYyAf&08a0JK(H+;O7>)974WU*3 zm5~LEd6tQ_G%YPP#fNE(ul6-5;a^t0M%nq+M+Qw^Y37GYG zimSM-#7>tR(3dpxU4o?y$ZbMy)wL$YY{f}Z8*+{D5^b7F`loA0x$tkO`%xX&J9Rk} z5>v~z=GE7p88Q7i^%C&D`X(ski}j1vS7HBAHE!gAhV3Oe#!1DlhY(N@X*Ui9h3ZX6 z8-{y$*sGE@K8urVIO96nYz5KhuWneqzNR)+TT@e0d+|>deHx{4O>N8~xP^&0H$`o2 zqLvZd2EU|?b{pe{n^j{)$oiiRt@lVP0$$NBF>c73Ucr)crj1(5qvJ~qmTV~xmvfeE zE{~Qmmdun#Lz*aHw!0Ywscf%xE~gprOZFqlIPvD5^geS*s>1(t~cdnbL>WPXBkm^$cspjjt;Ay zkZ3d@YLtjtS|3)Rh=sG;3t0YV@cw8HBJB3EOQ0usK~f`Vt2wIZ48w_O@=5H3Ce>5; z&?pcR@dG8eX#`C+SHvIZCJ@ZJ9VDbCfp>*`z)BRoj$C5T?xV^p2}*(c4NlT_g!{K; z#{eR)!~gy4tG-$TM&u&Wk<}Tu3cN1vKE+^TTgHK0=S2{HH5kb3fUJ{|p}c3o1>wRgmvINWqEzcsL0oSZf(Ex2lbDoMdn!B{(U zG68m^LUN|0&>Dt^D=AZoNyq6(3M^8rSDY6zDzpB+<`uCPFQM4lRF(fmbNemv02^`w zTUtTNu(YC?R$UBYdWrrN2c3^ZUM3u@mytZoZLP;_7)V%HLZD>jzuw{%KrjWumP|nf zRuWNHEwQJyo;93`(q4`sE`mM5Vp_}(3{ilQR*fDdJ8F}XS&N1Jh#>CPt7l0y8O3J! zN4*s5?KJd{w6+%7Wyi=shEyo^r;UVk zU4;yjQ&VhTC=Y$#0O?9(uoQtX>?UNrGV*Q&9sDcm?lFO z;ZN%C@IV+#RGN%>l4ia~Ggd|>GRn}4EGjxAu{lg)BO9aUOc(p|8FD8`?gW_pkz!;2 zM0=g~2c4(S!lybU+(3O`tEkCk9YVFw{|A2RAmMnRU%#P!eyanunurrphD{$cnzwdfwHUWsaZlh zisnrB5$KOlou%Ve$+&AL&5+Gns3}f9l&9BvRDPLo<6yDr@(lv5;pF8FXgkkbzL;}A zbw%4|5~o7%$U~(K;xi8hNm;x7llP`of|#fvVIfr}>EGI56J3kv&qGZOF(6)xY?Lj5 z9!Y3jbS_%=bGuK=Ow+*~$R)0&EN%L~ymHA66px*O##Ls>wMy~6cakAU07I-s)j4$T zlyj(J7mJesB8ghT?hzRua~H{~+c?MFPJd^w=gh?QkM*{4$A3dVf7RPsKnhmlAc{pN z(SURtr}%W@QGHqfU|fTbT>>r!tH(6>&?R6xu3$AXFP@__!O3o8S0M8Jkod_Z&4r64 z0vjysr`H&Ii5DqeF-mhylJR zRGTU*Rp6-*ED9X#t-=iZpd_%(pFrsv!BW&NShzE{qExTigL>s?qYcW`^~mmr zmQ!|*e`a=H!l3rb%vzBmlBh|P8g+J$sbY}bLk0`J@c9_r_bCj(83*aaMJBdPP|9bh zPk~q{9~BMlpV}B!?5P@MqjFE>juc7@Szg*me-6GgE7ro9+YXykeOSRc7o~X_d&Vf& z34gDe_Z2FgNm-ruV(0~MuA#nTeTu=rRf-yDuL?o}63BYapr#+U73f2#o2SP*&!Ot- z`~kZYdf4;t>2J|?$5g2OY1JhSA<8-e=fLg(U;ojo$?nO|WY9C>ZBhS)eVqj^fQU^bg;(J%u#=S7_XvF2$3Ezj4$2+Z#NF ztA@hrjI+e{1U`WBdmPXM6j)WLpEBG=*^Y;Zi(%J%b;|M>OaRQ05-~cUGjcC6AD5{N zYu2O;fBDVm71{&T!0d$EnvxHTm6`~;Y|Z|oa#G7ys}heZE@ zttnyml(EkT*gg3YwmL!L&xPRs2*C@SB?uQavYC}I6};S z^?{Ba^jW-1T;nkqiqowJ4(Zp>+tgPe)oAgt2V0?a$elu09i8<9eUI)${L-fgrZhIs*q4pBSh^K+q5W%&xhO7=R*~9_D3e zd_i*zlYZladkgU#&+Vn;SLDN)9Cwzh+t945(?|_~V2le~7#_>%whit9sEt#TA_~*U z4uxb10b2qiBJ^~ChGYerIa&mEL6j1&-hf#ED9<<9>Lh|zKfr_Tap9d19x*F(-cWSVpXQ$J9CB;m{$3 z8TmSPHVNU$f4D2O#fmX)n__HZ#x*sH3#nXY=qjL!U&metM_hy6dq>)9L4*7g4uT&r zJVUSWPmRncK;AXNr7#$lj{5&I(kbw2-M^?n3lTASTE^7lgieu9jxkmqV39U1WH?QB zKqV((U&D9qV|2fhY>8lPLXE#OrUAzYiS`vZ#G4e{ETe6vz6BU%IJg3^3sAP$sYZdT z#p`D*foA%@RTBY_k8|fWc6>#^86+ELXSD?WuqLQ@aZw<>K&*>`Y&1n&}n8 z1iR1x9mbTgI>p-<+JHkv+#jHH;+h)CAfm=GCSiO`Sy19rthEX?TON>#nIPP)+ zdqmW00h))q^fsh$pe~#YreqvxPxk{UWl{}^sFOqm^f|sy1z2v}|B5dQ@m@ifhFl*Z zWCE%{?}mGAX;(*dG+<~WHQ6!-N--j=-sW_K2>v<^O-NX{MN*?Ye;ji$Ty8d?yu6)H)$&*3zSy z3LKOG!WdB-6GBZ-&`iuTGzR(|W9YczG}l#)8_j^mQv9UHY*LH*pv160g>DdNe=@_> z)M{PAka(PXK^JWOBjcyAD17#&KAFdv=RX&ZPh;T*iu(i}=uWSk7(ck1H|{rhyfCd% z?7=w|jbhEyH`dK;`>ZN141eF#2kUi5#GFP}s;Rh2SKMEDaG(ZdGJ-n@ialn3D7aPd zGrPao1J#jA4Rr`tmAZuEiAx%%Drjs`2w|GXXC_wJh$7(ydpUDsp{#HUPC`bZS!CL>sf^$sL-0>H$1Km z1z1p>`UUbe1+!3Rg0Ipf5-_JNQV))NmA2JsAi%Rb!%ql8cNGRI4DJXuRC_)3!Vr@< zq3Q>mt}Rg|R{RoH^ou&N;+hpM9|B&&>7;^O+7l8jsQE8{ZO$CL%e65qv;->^_KAUS z9OX5YXM@@ySHR@)!*iR1Y01gb)Uy!`--6{^8+O zT>AfeSk~g_4~6Rr6PFb9h)Kg~o4D+oF5h&BbNnd4j-!F&CtL&VJQ=>wT?DW9)FVLP5HKS;w{M%k!)~(wq?TK9?sk5$n9COY>-OK3D zx&Zs{Uu-T^V&lJZQ2%H@N)}SbgA`hdc6Am4I9a>j<+LTznB#gZn4T}`H$X^nGF+6P zVEHJ}0g%oGU!_S}u7iaYR04rX(L&@^0?~rQGxn#s(f&p`=+a*`4p|Yf9Kp(}1mgln zNSr>hLcahp&&L=Pg*gmPX3obrS`_FBqgTZF7?en@8b;$kPoB5&dT(qH{R?#m0rA2G zCRh_mqj~XM;R45meLl|5$A!HKnwD^@tu9%WPF$oMC_}wovv2Wlee!3XXi8vOFM?L&h*c5dFeCF(E!!N>m( D;kM9d diff --git a/examples/run-example.ts b/examples/run-example.ts index 4ef23dcbb..25cf5784b 100644 --- a/examples/run-example.ts +++ b/examples/run-example.ts @@ -24,6 +24,8 @@ const repoRoot = resolve(dirname(new URL(import.meta.url).pathname), ".."); // need the path must handle null explicitly. const coreutilsWasm = tryResolveBinary("programs/coreutils.wasm"); const dashWasm = tryResolveBinary("programs/dash.wasm"); +const shWasm = tryResolveBinary("programs/sh.wasm"); +const shellWasm = dashWasm ?? shWasm; const grepWasm = tryResolveBinary("programs/grep.wasm"); const sedWasm = tryResolveBinary("programs/sed.wasm"); const gitWasm = tryResolveBinary("programs/git/git.wasm"); @@ -77,8 +79,8 @@ const builtinPrograms: Record = { "echo": echoWasm, "/bin/echo": echoWasm, "/usr/bin/echo": echoWasm, - "sh": dashWasm, - "/bin/sh": dashWasm, + "sh": shellWasm, + "/bin/sh": shellWasm, "dash": dashWasm, "/bin/dash": dashWasm, "grep": grepWasm, diff --git a/host/src/browser-kernel-worker-entry.ts b/host/src/browser-kernel-worker-entry.ts index 916ae4686..f2414e1b9 100644 --- a/host/src/browser-kernel-worker-entry.ts +++ b/host/src/browser-kernel-worker-entry.ts @@ -1072,6 +1072,7 @@ async function handleFork( maxAddr: childLayout.maxAddr, mmapBase: childLayout.mmapBase, }); + kernelWorker.inheritProcessSharedMappings(parentPid, childPid); const forkBufAddr = threadFork ? threadFork.forkBufAddr diff --git a/host/src/generated/abi.ts b/host/src/generated/abi.ts index 8537e5976..515696b9c 100644 --- a/host/src/generated/abi.ts +++ b/host/src/generated/abi.ts @@ -1,7 +1,7 @@ /* GENERATED by `cargo xtask dump-abi`. Do not edit by hand. */ /* Regenerated by scripts/check-abi-version.sh; drift is a CI failure. */ -export const ABI_VERSION = 15 as const; +export const ABI_VERSION = 16 as const; export const ABI_CUSTOM_SECTION = "wasm-posix-abi" as const; export const ABI_KERNEL_EXPORT = "__abi_version" as const; diff --git a/host/src/kernel-worker.ts b/host/src/kernel-worker.ts index c6e53546d..134ea095e 100644 --- a/host/src/kernel-worker.ts +++ b/host/src/kernel-worker.ts @@ -102,6 +102,7 @@ const EEXIST = 17; const ENAMETOOLONG = 36; const ETIMEDOUT = 110; const EINTR_ERRNO = 4; +const SHM_RDONLY = 0o10000; /** Syscall numbers for sleep/delay */ const SYS_NANOSLEEP = ABI_SYSCALLS.Nanosleep; @@ -180,12 +181,17 @@ const SYS_MREMAP = ABI_SYSCALLS.Mremap; const SYS_MSYNC = ABI_SYSCALLS.Msync; const SYS_WRITE = ABI_SYSCALLS.Write; const SYS_READ = ABI_SYSCALLS.Read; +const SYS_FSTAT = ABI_SYSCALLS.Fstat; const SYS_PREAD = ABI_SYSCALLS.Pread; const SYS_PWRITE = ABI_SYSCALLS.Pwrite; +const SYS_SENDFILE = ABI_SYSCALLS.Sendfile; const SYS_SEND = ABI_SYSCALLS.Send; const SYS_RECV = ABI_SYSCALLS.Recv; const SYS_SENDTO = ABI_SYSCALLS.Sendto; const SYS_RECVFROM = ABI_SYSCALLS.Recvfrom; +const SYS_FSYNC = ABI_SYSCALLS.Fsync; +const SYS_FDATASYNC = ABI_SYSCALLS.Fdatasync; +const SYS_FTRUNCATE = ABI_SYSCALLS.Ftruncate; const SYS_SENDMSG = ABI_SYSCALLS.Sendmsg; const SYS_RECVMSG = ABI_SYSCALLS.Recvmsg; const SYS_ACCEPT = ABI_SYSCALLS.Accept; @@ -197,6 +203,10 @@ const MSG_DONTWAIT = 0x0040; /** mmap flags */ const MAP_SHARED = 0x01; const MAP_ANONYMOUS = 0x20; +const PROT_WRITE = 0x02; +const O_RDONLY = 0; +const O_RDWR = 2; +const FILE_PAGE_SIZE = 4096; /** Syscall numbers for scatter/gather I/O */ const SYS_WRITEV = ABI_SYSCALLS.Writev; @@ -217,6 +227,13 @@ const SYS_MQ_TIMEDSEND = ABI_SYSCALLS.MqTimedsend; const SYS_MQ_TIMEDRECEIVE = ABI_SYSCALLS.MqTimedreceive; const SYS_CLOSE = ABI_SYSCALLS.Close; +const SYS_DUP = ABI_SYSCALLS.Dup; +const SYS_DUP2 = ABI_SYSCALLS.Dup2; +const SYS_DUP3 = ABI_SYSCALLS.Dup3; + +const F_DUPFD = 0; +const F_DUPFD_CLOEXEC = 1030; +const F_DUPFD_CLOFORK = 1028; /** IPC constants (must match musl) */ const IPC_64 = 0x100; @@ -236,6 +253,8 @@ const EAGAIN_RETRY_MS = 1; /** Profiling: enabled via WASM_POSIX_PROFILE env var. Zero-cost when disabled. */ const PROFILING = typeof process !== 'undefined' && !!process.env?.WASM_POSIX_PROFILE; +const THREAD_TRACE = typeof process !== "undefined" && !!process.env?.KERNEL_THREAD_TRACE; +const EXIT_TRACE = typeof process !== "undefined" && !!process.env?.KERNEL_EXIT_TRACE; /** Read-like syscalls that may block on pipe/socket data */ const READ_LIKE_SYSCALLS = new Set([ @@ -774,6 +793,7 @@ export class CentralizedKernelWorker { /** Pending pselect6/select retries — keyed by channelOffset for per-thread tracking */ private pendingSelectRetries = new Map>(); private lockTable: SharedLockTable | null = null; - /** Per-process shared memory mappings: pid → Map */ - private shmMappings = new Map>(); + /** Per-process SysV shared memory mappings. */ + private shmMappings = new Map>(); + /** Monotonic version per SysV segment, bumped when an attachment publishes writes. */ + private shmSegmentVersions = new Map(); /** PTY index → pid mapping (for draining output after syscalls) */ private ptyIndexByPid = new Map(); @@ -1554,6 +1576,8 @@ export class CentralizedKernelWorker { // Clean up network listeners/endpoints for this process this.cleanupUdpBindings(pid); this.cleanupTcpListeners(pid); + this.releaseAllSharedMappingsForProcess(pid); + this.releaseAllSysvShmMappingsForProcess(pid); // Clean up pending poll retries this.cleanupPendingPollRetries(pid); @@ -1667,6 +1691,8 @@ export class CentralizedKernelWorker { // Clean up network listeners/endpoints for this process this.cleanupUdpBindings(pid); this.cleanupTcpListeners(pid); + this.releaseAllSharedMappingsForProcess(pid); + this.releaseAllSysvShmMappingsForProcess(pid); // Clear the killed-but-not-yet-reaped guard for this pid; if the // pid is later reused for a fresh fork+register, the new process // gets its own reaping decision. @@ -2589,8 +2615,6 @@ export class CentralizedKernelWorker { this.kernel.bos.primeBindFromSab(channel.pid, boId, channel.memory); } } - this.assertKernelStackStage("after mmap backing population", kernelStackTrace, channel, syscallNr, origArgs); - // --- msync: flush MAP_SHARED regions back to file --- if (syscallNr === SYS_MSYNC && retVal === 0) { this.flushSharedMappings(channel, origArgs); @@ -2800,11 +2824,8 @@ export class CentralizedKernelWorker { } } - if (options.syncSharedMappings !== false) { - const includeAnonymous = this.syscallSynchronizesAnonymousSharedMemory(syscallNr); - this.syncSharedMappingsFromProcess(channel, includeAnonymous); - this.refreshSharedMappingsToProcess(channel, includeAnonymous); - } + this.synchronizeSharedMappingsForSyscallBoundary(channel, syscallNr); + this.synchronizeSysvShmMappingsForSyscallBoundary(channel); // Clear handling flag (channel is done — poller can pick it up for next syscall) channel.handling = false; @@ -3373,15 +3394,13 @@ export class CentralizedKernelWorker { for (const [, entry] of selectEntries) { if (!this.processes.has(entry.channel.pid)) continue; - // Cancel both setTimeout and setImmediate handles (one will be a no-op) - clearTimeout(entry.timer); - clearImmediate(entry.timer); + this.clearSelectRetryTimer(entry); // Re-dispatch to the right handler — SYS_SELECT and SYS_PSELECT6 have // different time-struct shapes (timeval vs timespec). if (entry.syscallNr === SYS_SELECT) { - this.handleSelect(entry.channel, entry.origArgs); + this.handleSelect(entry.channel, entry.origArgs, entry.deadline); } else { - this.handlePselect6(entry.channel, entry.origArgs); + this.handlePselect6(entry.channel, entry.origArgs, entry.deadline); } } @@ -3414,6 +3433,15 @@ export class CentralizedKernelWorker { } } + private clearSelectRetryTimer(entry: { timer: any; timerKind?: "timeout" | "immediate" | "none" }): void { + if (entry.timerKind === "none" || entry.timer == null) return; + if (entry.timerKind === "immediate") { + clearImmediate(entry.timer); + return; + } + clearTimeout(entry.timer); + } + /** * Remove a process's entries from pendingPipeReaders. * Called during process cleanup. @@ -3568,8 +3596,7 @@ export class CentralizedKernelWorker { // 3) Select/pselect retry timer. const selEntry = this.pendingSelectRetries.get(target.channelOffset); if (selEntry && selEntry.channel === target) { - clearTimeout(selEntry.timer); - clearImmediate(selEntry.timer); + this.clearSelectRetryTimer(selEntry); this.pendingSelectRetries.delete(target.channelOffset); this.completeChannelRaw(target, -EINTR_ERRNO, EINTR_ERRNO); this.relistenChannel(target); @@ -4217,7 +4244,7 @@ export class CentralizedKernelWorker { * own code is `select(0, NULL, NULL, NULL, &tv)` (mysys/my_sleep.c) — the * pure-sleep case, fast-path'd to a setTimeout. */ - private handleSelect(channel: ChannelInfo, origArgs: number[]): void { + private handleSelect(channel: ChannelInfo, origArgs: number[], existingDeadline?: number): void { const FD_SET_SIZE = 128; const nfds = origArgs[0]; const readPtr = origArgs[1]; @@ -4252,20 +4279,26 @@ export class CentralizedKernelWorker { this.completeChannel(channel, SYS_SELECT, origArgs, undefined, 0, 0); return; } - const finite = timeoutMs > 0; + const deadline = existingDeadline ?? (timeoutMs > 0 ? Date.now() + timeoutMs : -1); + if (deadline > 0 && Date.now() >= deadline) { + this.completeChannel(channel, SYS_SELECT, origArgs, undefined, 0, 0); + return; + } + const finite = deadline > 0; const timer = finite ? setTimeout(() => { this.pendingSelectRetries.delete(channel.channelOffset); if (this.processes.has(channel.pid)) { this.completeChannel(channel, SYS_SELECT, origArgs, undefined, 0, 0); } - }, timeoutMs) + }, Math.max(deadline - Date.now(), 1)) : (null as any); this.pendingSelectRetries.set(channel.channelOffset, { timer, + timerKind: finite ? "timeout" : "none", channel, origArgs, - deadline: finite ? Date.now() + timeoutMs : -1, + deadline, needsSignalSafeWake: false, syscallNr: SYS_SELECT, }); @@ -4345,7 +4378,11 @@ export class CentralizedKernelWorker { this.completeChannel(channel, SYS_SELECT, origArgs, undefined, 0, 0); return; } - const deadline = timeoutMs > 0 ? Date.now() + timeoutMs : -1; + const deadline = existingDeadline ?? (timeoutMs > 0 ? Date.now() + timeoutMs : -1); + if (deadline > 0 && Date.now() >= deadline) { + this.completeChannel(channel, SYS_SELECT, origArgs, undefined, 0, 0); + return; + } const retryFn = () => { this.pendingSelectRetries.delete(channel.channelOffset); if (!this.processes.has(channel.pid)) return; @@ -4353,13 +4390,13 @@ export class CentralizedKernelWorker { this.completeChannel(channel, SYS_SELECT, origArgs, undefined, 0, 0); return; } - this.handleSelect(channel, origArgs); + this.handleSelect(channel, origArgs, deadline); }; - const finite = timeoutMs > 0; + const finite = deadline > 0; const remainingMs = finite ? Math.max(deadline - Date.now(), 1) : 50; const timer = setTimeout(retryFn, Math.min(remainingMs, 50)); this.pendingSelectRetries.set(channel.channelOffset, { - timer, channel, origArgs, deadline, needsSignalSafeWake: false, + timer, timerKind: "timeout", channel, origArgs, deadline, needsSignalSafeWake: false, syscallNr: SYS_SELECT, }); return; @@ -4368,7 +4405,7 @@ export class CentralizedKernelWorker { this.completeChannel(channel, SYS_SELECT, origArgs, undefined, retVal, errVal); } - private handlePselect6(channel: ChannelInfo, origArgs: number[]): void { + private handlePselect6(channel: ChannelInfo, origArgs: number[], existingDeadline?: number): void { const FD_SET_SIZE = 128; const processMem = new Uint8Array(channel.memory.buffer); const kernelMem = this.getKernelMem(); @@ -4492,7 +4529,11 @@ export class CentralizedKernelWorker { return; } - const deadline = timeoutMs > 0 ? Date.now() + timeoutMs : -1; + const deadline = existingDeadline ?? (timeoutMs > 0 ? Date.now() + timeoutMs : -1); + if (deadline > 0 && Date.now() >= deadline) { + this.completeChannel(channel, SYS_PSELECT6, origArgs, undefined, 0, 0); + return; + } // pselect6 with a non-null sigmask pointer has the same late-signal // race as ppoll. See scheduleWakeBlockedRetriesDeferred. const needsSignalSafeWake = maskDataPtr !== 0; @@ -4502,27 +4543,30 @@ export class CentralizedKernelWorker { // With infinite timeout: block until signal (wakeAllBlockedRetries). if (nfds === 0) { if (timeoutMs > 0) { + const remainingMs = Math.max(deadline - Date.now(), 1); const timer = setTimeout(() => { this.pendingSelectRetries.delete(channel.channelOffset); if (this.processes.has(channel.pid)) { this.completeChannel(channel, SYS_PSELECT6, origArgs, undefined, 0, 0); } - }, timeoutMs); + }, remainingMs); this.pendingSelectRetries.set(channel.channelOffset, { - timer, channel, origArgs, deadline, needsSignalSafeWake, syscallNr: SYS_PSELECT6, + timer, timerKind: "timeout", channel, origArgs, deadline, needsSignalSafeWake, syscallNr: SYS_PSELECT6, }); } else { // Infinite timeout with nfds=0: wait for signal delivery. // No timer — wakeAllBlockedRetries will trigger the retry. this.pendingSelectRetries.set(channel.channelOffset, { - timer: null as any, channel, origArgs, deadline: -1, + timer: null as any, timerKind: "none", channel, origArgs, deadline: -1, needsSignalSafeWake, syscallNr: SYS_PSELECT6, }); } return; } - // For finite timeout with actual fds, track the deadline + // For finite timeout with actual fds, track the deadline. State changes + // wake this early through wakeAllBlockedRetries; otherwise the timer is + // the timeout/fallback retry. const retryFn = () => { this.pendingSelectRetries.delete(channel.channelOffset); if (!this.processes.has(channel.pid)) return; @@ -4530,11 +4574,14 @@ export class CentralizedKernelWorker { this.completeChannel(channel, SYS_PSELECT6, origArgs, undefined, 0, 0); return; } - this.handlePselect6(channel, origArgs); + this.handlePselect6(channel, origArgs, deadline); }; - const timer = setImmediate(retryFn); + const retryMs = deadline > 0 + ? Math.max(1, Math.min(deadline - Date.now(), 50)) + : 50; + const timer = setTimeout(retryFn, retryMs); this.pendingSelectRetries.set(channel.channelOffset, { - timer, channel, origArgs, deadline, needsSignalSafeWake, syscallNr: SYS_PSELECT6, + timer, timerKind: "timeout", channel, origArgs, deadline, needsSignalSafeWake, syscallNr: SYS_PSELECT6, }); return; } @@ -6118,7 +6165,7 @@ export class CentralizedKernelWorker { /** * Read a null-terminated string from process memory at the given pointer. */ - private readCStringFromProcess(mem: Uint8Array, ptr: number, maxLen = 4096): string { + private readCStringFromProcess(mem: Uint8Array, ptr: number, maxLen = 1024 * 1024): string { if (ptr === 0) return ""; let len = 0; while (ptr + len < mem.length && mem[ptr + len] !== 0 && len < maxLen) { @@ -6503,6 +6550,10 @@ export class CentralizedKernelWorker { return; } this.hostReaped.add(exitingPid); + // Publish process-owned shared-memory writes before waking waiters. A + // parent returning from waitpid must see a child's MAP_SHARED updates. + this.releaseAllSharedMappingsForProcess(exitingPid); + this.releaseAllSysvShmMappingsForProcess(exitingPid); this.notifyParentOfExitedProcess(exitingPid); // Complete the channel so the worker unblocks from Atomics.wait(). @@ -6537,7 +6588,8 @@ export class CentralizedKernelWorker { this.notifyParentOfExitedProcess(exitingPid); // Clean up per-process state - this.sharedMappings.delete(exitingPid); + this.releaseAllSharedMappingsForProcess(exitingPid); + this.releaseAllSysvShmMappingsForProcess(exitingPid); // Do NOT complete the channel — the worker is blocked on Atomics.wait // and waking it would cause the C code to continue executing. @@ -7108,14 +7160,13 @@ export class CentralizedKernelWorker { // 3. Pending select/pselect6 retries for (const [key, selectEntry] of this.pendingSelectRetries) { if (selectEntry.channel.pid !== targetPid) continue; - clearTimeout(selectEntry.timer); - clearImmediate(selectEntry.timer); + this.clearSelectRetryTimer(selectEntry); this.pendingSelectRetries.delete(key); if (!this.processes.has(targetPid)) continue; if (selectEntry.syscallNr === SYS_SELECT) { - this.handleSelect(selectEntry.channel, selectEntry.origArgs); + this.handleSelect(selectEntry.channel, selectEntry.origArgs, selectEntry.deadline); } else { - this.handlePselect6(selectEntry.channel, selectEntry.origArgs); + this.handlePselect6(selectEntry.channel, selectEntry.origArgs, selectEntry.deadline); } } } @@ -7762,97 +7813,6 @@ export class CentralizedKernelWorker { } } - /** - * Flush MAP_SHARED regions that overlap the msync range back to the file. - * Reads from process memory and writes to the file via pwrite. - */ - private flushSharedMappings( - channel: ChannelInfo, - origArgs: number[], - ): void { - const syncAddr = origArgs[0] >>> 0; - const syncLen = origArgs[1] >>> 0; - const pidMap = this.sharedMappings.get(channel.pid); - if (!pidMap || pidMap.size === 0) return; - - const syncEnd = syncAddr + syncLen; - - for (const [mapAddr, mapping] of pidMap) { - const mapEnd = mapAddr + mapping.len; - // Check overlap - if (mapAddr >= syncEnd || mapEnd <= syncAddr) continue; - - // Compute overlap region - const flushStart = Math.max(syncAddr, mapAddr); - const flushEnd = Math.min(syncEnd, mapEnd); - const flushLen = flushEnd - flushStart; - if (flushLen <= 0) continue; - - // File offset for the flush region - const fileOffsetBase = mapping.fileOffset + (flushStart - mapAddr); - - // Read from process memory and write to file via pwrite - this.pwriteFromProcessMemory( - channel, mapping.fd, flushStart, flushLen, fileOffsetBase, - ); - } - } - - /** - * Write data from process memory to a file via kernel pwrite syscalls. - */ - private pwriteFromProcessMemory( - channel: ChannelInfo, - fd: number, - processAddr: number, - len: number, - fileOffset: number, - ): void { - const handleChannel = this.kernelInstance!.exports.kernel_handle_channel as - (offset: KernelPointer, pid: number) => number; - const kernelView = new DataView(this.kernelMemory!.buffer, this.scratchOffset); - const kernelMem = new Uint8Array(this.kernelMemory!.buffer); - const dataStart = this.scratchOffset + CH_DATA; - - let written = 0; - while (written < len) { - const chunkSize = Math.min(CH_DATA_SIZE, len - written); - - // Copy chunk from process memory to kernel scratch data area - const processMem = new Uint8Array(channel.memory.buffer); - kernelMem.set( - processMem.subarray(processAddr + written, processAddr + written + chunkSize), - dataStart, - ); - - // Set up pwrite syscall in kernel scratch: - // SYS_PWRITE (65): (fd, buf_ptr, count, offset_lo, offset_hi) - const curOffset = fileOffset + written; - kernelView.setUint32(CH_SYSCALL, SYS_PWRITE, true); - kernelView.setBigInt64(CH_ARGS + 0 * CH_ARG_SIZE, BigInt(fd), true); - kernelView.setBigInt64(CH_ARGS + 1 * CH_ARG_SIZE, BigInt(dataStart), true); - kernelView.setBigInt64(CH_ARGS + 2 * CH_ARG_SIZE, BigInt(chunkSize), true); - kernelView.setBigInt64(CH_ARGS + 3 * CH_ARG_SIZE, BigInt(curOffset & 0xffffffff), true); - kernelView.setBigInt64(CH_ARGS + 4 * CH_ARG_SIZE, BigInt(Math.floor(curOffset / 0x100000000) | 0), true); - kernelView.setBigInt64(CH_ARGS + 5 * CH_ARG_SIZE, BigInt(0), true); - - this.currentHandlePid = channel.pid; - this.bindKernelTidForChannel(channel); - try { - handleChannel(this.toKernelPtr(this.scratchOffset), channel.pid); - } catch { - break; - } - this.currentHandlePid = 0; - - const bytesWritten = Number(kernelView.getBigInt64(CH_RETURN, true)); - if (bytesWritten <= 0) break; - - written += bytesWritten; - if (bytesWritten < chunkSize) break; - } - } - /** * Flush MAP_SHARED regions that overlap the msync/munmap range. */ @@ -8154,13 +8114,66 @@ export class CentralizedKernelWorker { const pidMap = this.sharedMappings.get(pid); if (!pidMap) return; - const unmapEnd = addr + len; - for (const [mapAddr, mapping] of pidMap) { + const alignedLen = Math.ceil(len / WASM_PAGE_SIZE) * WASM_PAGE_SIZE; + if (alignedLen <= 0) return; + const unmapEnd = addr + alignedLen; + + for (const [mapAddr, mapping] of Array.from(pidMap.entries())) { const mapEnd = mapAddr + mapping.len; - // Remove if fully contained in unmap range - if (mapAddr >= addr && mapEnd <= unmapEnd) { + + const overlapStart = Math.max(addr, mapAddr); + const overlapEnd = Math.min(unmapEnd, mapEnd); + if (overlapStart >= overlapEnd) continue; + + if (overlapStart <= mapAddr && overlapEnd >= mapEnd) { + this.releaseSharedMapping(mapping); pidMap.delete(mapAddr); + continue; + } + + if (overlapStart <= mapAddr) { + const trim = overlapEnd - mapAddr; + const newAddr = overlapEnd; + const newLen = mapEnd - overlapEnd; + pidMap.delete(mapAddr); + if (newLen > 0) { + pidMap.set(newAddr, { + ...mapping, + fileOffset: mapping.fileOffset + trim, + len: newLen, + snapshot: mapping.snapshot.slice(trim), + }); + } else { + this.releaseSharedMapping(mapping); + } + continue; + } + + if (overlapEnd >= mapEnd) { + const newLen = overlapStart - mapAddr; + mapping.len = newLen; + mapping.snapshot = mapping.snapshot.slice(0, newLen); + continue; + } + + const leftLen = overlapStart - mapAddr; + const rightSkip = overlapEnd - mapAddr; + const rightLen = mapEnd - overlapEnd; + const rightAddr = overlapEnd; + const rightMapping: SharedMmapMapping = { + ...mapping, + fileOffset: mapping.fileOffset + rightSkip, + len: rightLen, + snapshot: mapping.snapshot.slice(rightSkip), + }; + mapping.len = leftLen; + mapping.snapshot = mapping.snapshot.slice(0, leftLen); + + const backing = this.sharedMmapBackings.get(mapping.backingKey); + if (backing) { + backing.refCount++; } + pidMap.set(rightAddr, rightMapping); } if (pidMap.size === 0) { @@ -8168,6 +8181,222 @@ export class CentralizedKernelWorker { } } + private releaseAllSharedMappingsForProcess(pid: number): void { + const registration = this.processes.get(pid); + if (registration) { + this.syncSharedMappingsFromProcess({ + pid, + memory: registration.memory, + channelOffset: 0, + i32View: new Int32Array(registration.memory.buffer, 0, 1), + consecutiveSyscalls: 0, + }); + } + + const pidMap = this.sharedMappings.get(pid); + if (!pidMap) return; + for (const mapping of pidMap.values()) { + const backing = this.sharedMmapBackings.get(mapping.backingKey); + if (backing) this.flushBackingRange(backing, mapping.fileOffset, mapping.len); + this.releaseSharedMapping(mapping); + } + this.sharedMappings.delete(pid); + } + + private inheritSharedMappings(parentPid: number, childPid: number): void { + const parentMap = this.sharedMappings.get(parentPid); + const childRegistration = this.processes.get(childPid); + if (!parentMap || parentMap.size === 0 || !childRegistration) return; + + const childMem = new Uint8Array(childRegistration.memory.buffer); + const childMap = new Map(); + for (const [mapAddr, mapping] of parentMap) { + const backing = this.sharedMmapBackings.get(mapping.backingKey); + if (!backing || mapAddr + mapping.len > childMem.length) continue; + backing.refCount++; + const snapshot = childMem.slice(mapAddr, mapAddr + mapping.len); + childMap.set(mapAddr, { + ...mapping, + snapshot, + version: backing.version, + }); + } + if (childMap.size > 0) { + this.sharedMappings.set(childPid, childMap); + } + } + + inheritProcessSharedMappings(parentPid: number, childPid: number): void { + this.inheritSharedMappings(parentPid, childPid); + this.inheritSysvShmMappings(parentPid, childPid); + } + + private setKernelCurrentPid(pid: number): void { + const setCurrentPid = this.kernelInstance!.exports.kernel_set_current_pid as + ((pid: number) => void) | undefined; + if (setCurrentPid) setCurrentPid(pid); + } + + private synchronizeSysvShmMappingsForSyscallBoundary(channel: ChannelInfo): void { + this.syncSysvShmMappingsFromProcess(channel); + this.refreshSysvShmMappingsToProcess(channel); + } + + private syncSysvShmMappingsFromProcess(channel: ChannelInfo): void { + const pidMap = this.shmMappings.get(channel.pid); + if (!pidMap || pidMap.size === 0) return; + const processMem = new Uint8Array(channel.memory.buffer); + this.setKernelCurrentPid(channel.pid); + + for (const [mapAddr, mapping] of pidMap) { + this.syncSysvShmMappingFromProcess(processMem, mapAddr, mapping); + } + } + + private syncSysvShmMappingFromProcess( + processMem: Uint8Array, + mapAddr: number, + mapping: SysvShmMapping, + ): void { + if (mapping.readOnly) return; + if (mapAddr + mapping.size > processMem.length) return; + + let changed = false; + for (let offset = 0; offset < mapping.size; offset += FILE_PAGE_SIZE) { + const n = Math.min(FILE_PAGE_SIZE, mapping.size - offset); + if (!this.rangeDiffersFromSnapshot( + processMem, + mapAddr + offset, + mapping.snapshot, + offset, + n, + )) { + continue; + } + + const bytes = processMem.subarray(mapAddr + offset, mapAddr + offset + n); + if (!this.writeSysvShmRange(mapping.segId, offset, bytes)) break; + mapping.snapshot.set(bytes, offset); + changed = true; + } + + if (changed) { + const version = (this.shmSegmentVersions.get(mapping.segId) ?? 0) + 1; + this.shmSegmentVersions.set(mapping.segId, version); + mapping.version = version; + } + } + + private refreshSysvShmMappingsToProcess(channel: ChannelInfo): void { + const pidMap = this.shmMappings.get(channel.pid); + if (!pidMap || pidMap.size === 0) return; + const processMem = new Uint8Array(channel.memory.buffer); + + for (const [mapAddr, mapping] of pidMap) { + const version = this.shmSegmentVersions.get(mapping.segId) ?? 0; + if (mapping.version === version) continue; + if (mapAddr + mapping.size > processMem.length) continue; + const latest = this.readSysvShmRange(mapping.segId, 0, mapping.size); + if (!latest) continue; + processMem.set(latest, mapAddr); + mapping.snapshot = latest; + mapping.version = version; + } + } + + private readSysvShmRange(segId: number, offset: number, len: number): Uint8Array | null { + const readChunk = this.kernelInstance!.exports.kernel_ipc_shm_read_chunk as + (shmid: number, offset: number, outPtr: KernelPointer, maxLen: number) => number; + const kernelMem = this.getKernelMem(); + const chunkPtr = this.scratchOffset + CH_DATA; + const out = new Uint8Array(len); + let transferred = 0; + + while (transferred < len) { + const toRead = Math.min(CH_DATA_SIZE, len - transferred); + const nRead = readChunk(segId, offset + transferred, this.toKernelPtr(chunkPtr), toRead); + if (nRead < 0) return null; + if (nRead === 0) break; + out.set(kernelMem.subarray(chunkPtr, chunkPtr + nRead), transferred); + transferred += nRead; + } + + return out; + } + + private writeSysvShmRange(segId: number, offset: number, bytes: Uint8Array): boolean { + const writeChunk = this.kernelInstance!.exports.kernel_ipc_shm_write_chunk as + (shmid: number, offset: number, dataPtr: KernelPointer, dataLen: number) => number; + const kernelMem = this.getKernelMem(); + const chunkPtr = this.scratchOffset + CH_DATA; + let transferred = 0; + + while (transferred < bytes.length) { + const toWrite = Math.min(CH_DATA_SIZE, bytes.length - transferred); + kernelMem.set(bytes.subarray(transferred, transferred + toWrite), chunkPtr); + const nWritten = writeChunk(segId, offset + transferred, this.toKernelPtr(chunkPtr), toWrite); + if (nWritten <= 0) return false; + transferred += nWritten; + } + + return true; + } + + private releaseAllSysvShmMappingsForProcess(pid: number): void { + const registration = this.processes.get(pid); + const pidMap = this.shmMappings.get(pid); + if (!pidMap || pidMap.size === 0) return; + + if (registration) { + this.syncSysvShmMappingsFromProcess({ + pid, + memory: registration.memory, + channelOffset: 0, + i32View: new Int32Array(registration.memory.buffer, 0, 1), + consecutiveSyscalls: 0, + }); + } + + this.setKernelCurrentPid(pid); + const kernelShmdt = this.kernelInstance!.exports.kernel_ipc_shmdt as + ((shmid: number) => number) | undefined; + if (kernelShmdt) { + for (const mapping of pidMap.values()) { + kernelShmdt(mapping.segId); + } + } + this.shmMappings.delete(pid); + } + + private inheritSysvShmMappings(parentPid: number, childPid: number): void { + const parentMap = this.shmMappings.get(parentPid); + const childRegistration = this.processes.get(childPid); + if (!parentMap || parentMap.size === 0 || !childRegistration) return; + + const childMem = new Uint8Array(childRegistration.memory.buffer); + const childMap = new Map(); + const kernelShmat = this.kernelInstance!.exports.kernel_ipc_shmat as + ((shmid: number, shmaddr: number, flags: number) => number) | undefined; + if (!kernelShmat) return; + + this.setKernelCurrentPid(childPid); + for (const [mapAddr, mapping] of parentMap) { + if (mapAddr + mapping.size > childMem.length) continue; + const flags = mapping.readOnly ? SHM_RDONLY : 0; + const sizeOrErr = kernelShmat(mapping.segId, mapAddr, flags); + if (sizeOrErr < 0) continue; + childMap.set(mapAddr, { + ...mapping, + snapshot: childMem.slice(mapAddr, mapAddr + mapping.size), + version: this.shmSegmentVersions.get(mapping.segId) ?? mapping.version, + }); + } + + if (childMap.size > 0) { + this.shmMappings.set(childPid, childMap); + } + } + /** Set the next child PID to allocate. */ setNextChildPid(pid: number): void { this.nextChildPid = pid; @@ -9125,7 +9354,6 @@ export class CentralizedKernelWorker { } } this.tcpConnections.delete(pid); - this.shmMappings.delete(pid); } // ========================================================================= @@ -9252,8 +9480,7 @@ export class CentralizedKernelWorker { const [shmid, _shmaddr, _flags] = args; // Set current pid for kernel_ipc_* exports - const setCurrentPid = this.kernelInstance!.exports.kernel_set_current_pid as ((pid: number) => void) | undefined; - if (setCurrentPid) setCurrentPid(channel.pid); + this.setKernelCurrentPid(channel.pid); const kernelShmat = this.kernelInstance!.exports.kernel_ipc_shmat as (shmid: number, shmaddr: number, flags: number) => number; const sizeOrErr = kernelShmat(shmid, _shmaddr, _flags); @@ -9265,11 +9492,13 @@ export class CentralizedKernelWorker { const size = sizeOrErr; // Synthesize mmap to allocate virtual address space for this pid + const readOnly = (_flags & SHM_RDONLY) !== 0; + const prot = readOnly ? 1 : 3; // PROT_READ, or PROT_READ|PROT_WRITE. const kernelView = new DataView(this.kernelMemory!.buffer, this.scratchOffset); kernelView.setUint32(CH_SYSCALL, SYS_MMAP, true); kernelView.setBigInt64(CH_ARGS + 0 * CH_ARG_SIZE, BigInt(0), true); // addr hint = NULL kernelView.setBigInt64(CH_ARGS + 1 * CH_ARG_SIZE, BigInt(size), true); // length - kernelView.setBigInt64(CH_ARGS + 2 * CH_ARG_SIZE, BigInt(3), true); // prot = PROT_READ|PROT_WRITE + kernelView.setBigInt64(CH_ARGS + 2 * CH_ARG_SIZE, BigInt(prot), true); // prot kernelView.setBigInt64(CH_ARGS + 3 * CH_ARG_SIZE, BigInt(0x22), true); // flags = MAP_PRIVATE|MAP_ANONYMOUS kernelView.setBigInt64(CH_ARGS + 4 * CH_ARG_SIZE, BigInt(-1), true); // fd = -1 kernelView.setBigInt64(CH_ARGS + 5 * CH_ARG_SIZE, BigInt(0), true); // offset = 0 @@ -9296,23 +9525,13 @@ export class CentralizedKernelWorker { } // Grow process memory to cover the allocated address - this.ensureProcessMemoryCovers(channel.pid, channel.memory, SYS_MMAP, addr, [0, size, 3, 0x22, -1, 0]); + this.ensureProcessMemoryCovers(channel.pid, channel.memory, SYS_MMAP, addr, [0, size, prot, 0x22, -1, 0]); // Transfer segment data from kernel to process memory via read_chunk - const readChunk = this.kernelInstance!.exports.kernel_ipc_shm_read_chunk as - (shmid: number, offset: number, outPtr: KernelPointer, maxLen: number) => number; const processMem = new Uint8Array(channel.memory.buffer); - const kernelMem = this.getKernelMem(); - const chunkSize = CH_DATA_SIZE; - const chunkPtr = this.scratchOffset + CH_DATA; - let transferred = 0; - while (transferred < size) { - const remaining = size - transferred; - const toRead = Math.min(remaining, chunkSize); - const nRead = readChunk(shmid, transferred, this.toKernelPtr(chunkPtr), toRead); - if (nRead <= 0) break; - processMem.set(kernelMem.subarray(chunkPtr, chunkPtr + nRead), (addr >>> 0) + transferred); - transferred += nRead; + const snapshot = this.readSysvShmRange(shmid, 0, size); + if (snapshot) { + processMem.set(snapshot, addr >>> 0); } // Track the mapping for shmdt @@ -9321,7 +9540,14 @@ export class CentralizedKernelWorker { pidMappings = new Map(); this.shmMappings.set(channel.pid, pidMappings); } - pidMappings.set(addr >>> 0, { segId: shmid, size }); + const mapAddr = addr >>> 0; + pidMappings.set(mapAddr, { + segId: shmid, + size, + readOnly, + snapshot: snapshot ?? processMem.slice(mapAddr, mapAddr + size), + version: this.shmSegmentVersions.get(shmid) ?? 0, + }); this.completeChannelRaw(channel, addr, 0); this.relistenChannel(channel); @@ -9344,25 +9570,12 @@ export class CentralizedKernelWorker { } // Set current pid for kernel exports - const setCurrentPid = this.kernelInstance!.exports.kernel_set_current_pid as ((pid: number) => void) | undefined; - if (setCurrentPid) setCurrentPid(channel.pid); + this.setKernelCurrentPid(channel.pid); - // Sync process memory back to kernel segment via write_chunk - const writeChunk = this.kernelInstance!.exports.kernel_ipc_shm_write_chunk as - (shmid: number, offset: number, dataPtr: KernelPointer, dataLen: number) => number; + // Sync only dirty writable mappings. A read-only attachment must never + // overwrite newer segment contents on detach. const processMem = new Uint8Array(channel.memory.buffer); - const kernelMem = this.getKernelMem(); - const chunkSize = CH_DATA_SIZE; - const chunkPtr = this.scratchOffset + CH_DATA; - let transferred = 0; - while (transferred < mapping.size) { - const remaining = mapping.size - transferred; - const toWrite = Math.min(remaining, chunkSize); - kernelMem.set(processMem.subarray(addr + transferred, addr + transferred + toWrite), chunkPtr); - const nWritten = writeChunk(mapping.segId, transferred, this.toKernelPtr(chunkPtr), toWrite); - if (nWritten <= 0) break; - transferred += nWritten; - } + this.syncSysvShmMappingFromProcess(processMem, addr, mapping); // Kernel-side detach bookkeeping const kernelShmdt = this.kernelInstance!.exports.kernel_ipc_shmdt as (shmid: number) => number; diff --git a/host/src/node-kernel-worker-entry.ts b/host/src/node-kernel-worker-entry.ts index cadcf5ab4..4bd3c824f 100644 --- a/host/src/node-kernel-worker-entry.ts +++ b/host/src/node-kernel-worker-entry.ts @@ -15,6 +15,7 @@ */ import { parentPort } from "node:worker_threads"; import { readFileSync, existsSync, mkdtempSync, rmSync } from "node:fs"; +import { createHash } from "node:crypto"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { fileURLToPath } from "node:url"; @@ -105,6 +106,8 @@ interface ProcessInfo { const processes = new Map(); const processTeardowns = new Map>(); const reportedExits = new Set(); +const compiledProgramModules = new Map>(); +const MAX_COMPILED_PROGRAM_MODULES = 16; // Workers terminated by the kernel-worker entry itself (handleExit / // handleExec / handleTerminate). The crash safety-net listener checks @@ -356,6 +359,28 @@ function bufferToArrayBuffer(bytes: Uint8Array): ArrayBuffer { return out; } +function programModuleCacheKey(programBytes: ArrayBuffer): string { + const bytes = new Uint8Array(programBytes); + return `${bytes.byteLength}:${createHash("sha256").update(bytes).digest("hex")}`; +} + +async function getCompiledProgramModule( + programBytes: ArrayBuffer, +): Promise { + const key = programModuleCacheKey(programBytes); + let promise = compiledProgramModules.get(key); + if (!promise) { + promise = WebAssembly.compile(programBytes); + compiledProgramModules.set(key, promise); + promise.catch(() => compiledProgramModules.delete(key)); + if (compiledProgramModules.size > MAX_COMPILED_PROGRAM_MODULES) { + const oldest = compiledProgramModules.keys().next().value; + if (oldest) compiledProgramModules.delete(oldest); + } + } + return promise; +} + function resolveExecLocal(path: string): ArrayBuffer | null { const mapped = execPrograms[path]; if (mapped && existsSync(mapped)) { @@ -755,6 +780,7 @@ async function handleFork( maxAddr: childLayout.maxAddr, mmapBase: childLayout.mmapBase, }); + kernelWorker.inheritProcessSharedMappings(parentPid, childPid); const FORK_BUF_SIZE = FORK_SAVE_BUFFER_SIZE; const forkBufAddr = threadFork diff --git a/host/src/platform/node.ts b/host/src/platform/node.ts index 8bf55dce2..58a133272 100644 --- a/host/src/platform/node.ts +++ b/host/src/platform/node.ts @@ -13,6 +13,15 @@ import type { PlatformIO, StatResult, StatfsResult } from "../types"; import { nativeStatfs, translateOpenFlags } from "../vfs/host-fs"; import { NativeMetadataOverlay } from "./native-metadata"; +const UTIME_NOW = 0x3fffffff; +const UTIME_OMIT = 0x3ffffffe; + +function makeFsError(code: string, message: string): Error & { code: string } { + const err = new Error(`${code}: ${message}`) as Error & { code: string }; + err.code = code; + return err; +} + export class NodePlatformIO implements PlatformIO { private dirHandles = new Map(); private nextDirHandle = 1; diff --git a/host/test/centralized-test-helper.ts b/host/test/centralized-test-helper.ts index ac371ef9f..4665d9f23 100644 --- a/host/test/centralized-test-helper.ts +++ b/host/test/centralized-test-helper.ts @@ -328,6 +328,7 @@ async function runOnMainThread(options: RunProgramOptions): Promise { it("returns a virtual MAC address via ioctl", async () => { const result = await runCentralizedProgram({ programPath: resolveBinary("programs/ifhwaddr.wasm"), + useDefaultRootfs: false, timeout: 10_000, }); diff --git a/host/test/multi-worker.test.ts b/host/test/multi-worker.test.ts index 618cb0e3d..eb2fc53ab 100644 --- a/host/test/multi-worker.test.ts +++ b/host/test/multi-worker.test.ts @@ -81,6 +81,9 @@ describe("CentralizedKernelWorker Process Management", () => { cleanupPendingSelectRetries: vi.fn(), cleanupUdpBindings: vi.fn(), cleanupTcpListeners: vi.fn(), + sharedMappings: new Map(), + sharedMmapBackings: new Map(), + shmMappings: new Map(), hostReaped: new Set([pid]), }) as CentralizedKernelWorker; diff --git a/host/test/vfs.test.ts b/host/test/vfs.test.ts index 12288acde..a24d95a16 100644 --- a/host/test/vfs.test.ts +++ b/host/test/vfs.test.ts @@ -1,5 +1,5 @@ -import { describe, it, expect, beforeEach, afterEach } from "vitest"; -import { mkdtempSync, writeFileSync, readFileSync, rmSync } from "node:fs"; +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import { mkdirSync, mkdtempSync, writeFileSync, readFileSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { VirtualPlatformIO } from "../src/vfs/vfs"; @@ -356,8 +356,14 @@ describe("HostFileSystem path traversal", () => { }); it("rejects paths with embedded .. sequences", () => { - const hfs = new HostFileSystem("/tmp/sandbox"); - expect(() => hfs.stat("/subdir/../../etc/passwd")).toThrow("EACCES"); + const root = mkdtempSync(join(tmpdir(), "kandelo-vfs-")); + try { + mkdirSync(join(root, "subdir")); + const hfs = new HostFileSystem(root); + expect(() => hfs.stat("/subdir/../../etc/passwd")).toThrow("EACCES"); + } finally { + rmSync(root, { recursive: true, force: true }); + } }); }); diff --git a/libc/glue/abi_constants.h b/libc/glue/abi_constants.h index 690d0b988..8fb227203 100644 --- a/libc/glue/abi_constants.h +++ b/libc/glue/abi_constants.h @@ -4,7 +4,7 @@ #define WASM_POSIX_ABI_CONSTANTS_H /* Mirrors wasm_posix_shared::ABI_VERSION. */ -#define WASM_POSIX_ABI_VERSION 15u +#define WASM_POSIX_ABI_VERSION 16u /* Default process-wasm pthread slot declaration. */ #define WASM_POSIX_THREAD_SLOT_DECL_DEFAULT -1 From 9a7ec18465cc62555074127c76c2facf64d905d4 Mon Sep 17 00:00:00 2001 From: Brandon Payton Date: Sat, 20 Jun 2026 02:11:58 -0400 Subject: [PATCH 8/8] host: avoid single-observer shared-memory sync churn --- host/src/kernel-worker.ts | 69 +++++++++++++++++-- host/test/mmap-shared.test.ts | 81 ++++++++++++++++++++++ host/test/multi-worker.test.ts | 5 ++ host/test/sysv-ipc.test.ts | 118 ++++++++++++++++++++++++++++++++- 4 files changed, 268 insertions(+), 5 deletions(-) diff --git a/host/src/kernel-worker.ts b/host/src/kernel-worker.ts index 134ea095e..92f447155 100644 --- a/host/src/kernel-worker.ts +++ b/host/src/kernel-worker.ts @@ -5951,6 +5951,13 @@ export class CentralizedKernelWorker { } // Call the async fork handler to spawn child Worker + // A process may be the only current observer of a large shared-memory + // backing before fork (OPcache's arena is the common case). Normal syscall + // boundaries can skip single-observer publishes, but fork creates another + // observer and SysV segments may later be observed by new shmat callers. + this.syncSharedMappingsFromProcess(channel, true, { force: true }); + this.syncSysvShmMappingsFromProcess(channel, { force: true }); + this.callbacks.onFork(parentPid, childPid, channel.memory, threadFork).then((childChannelOffsets) => { if (!this.processes.has(parentPid)) return; @@ -7639,7 +7646,11 @@ export class CentralizedKernelWorker { return true; } - private syncSharedMappingsFromProcess(channel: ChannelInfo, includeAnonymous = true): void { + private syncSharedMappingsFromProcess( + channel: ChannelInfo, + includeAnonymous = true, + options: { force?: boolean } = {}, + ): void { const pidMap = this.sharedMappings.get(channel.pid); if (!pidMap || pidMap.size === 0) return; const processMem = new Uint8Array(channel.memory.buffer); @@ -7649,6 +7660,14 @@ export class CentralizedKernelWorker { const backing = this.sharedMmapBackings.get(mapping.backingKey); if (!backing) continue; if (backing.anonymous && !includeAnonymous) continue; + if ( + !options.force + && backing.anonymous + && backing.refCount <= 1 + && mapping.version === backing.version + ) { + continue; + } if (mapAddr + mapping.len > processMem.length) continue; let changed = false; @@ -8190,7 +8209,7 @@ export class CentralizedKernelWorker { channelOffset: 0, i32View: new Int32Array(registration.memory.buffer, 0, 1), consecutiveSyscalls: 0, - }); + }, true, { force: true }); } const pidMap = this.sharedMappings.get(pid); @@ -8242,17 +8261,53 @@ export class CentralizedKernelWorker { this.refreshSysvShmMappingsToProcess(channel); } - private syncSysvShmMappingsFromProcess(channel: ChannelInfo): void { + private syncSysvShmMappingsFromProcess( + channel: ChannelInfo, + options: { force?: boolean } = {}, + ): void { const pidMap = this.shmMappings.get(channel.pid); if (!pidMap || pidMap.size === 0) return; const processMem = new Uint8Array(channel.memory.buffer); this.setKernelCurrentPid(channel.pid); for (const [mapAddr, mapping] of pidMap) { + const segmentVersion = this.shmSegmentVersions.get(mapping.segId) ?? 0; + if ( + !options.force + && mapping.version === segmentVersion + && !this.hasPeerSysvShmMapping(channel.pid, mapAddr, mapping.segId) + ) { + continue; + } this.syncSysvShmMappingFromProcess(processMem, mapAddr, mapping); } } + private hasPeerSysvShmMapping(pid: number, mapAddr: number, segId: number): boolean { + for (const [otherPid, pidMap] of this.shmMappings) { + for (const [otherAddr, otherMapping] of pidMap) { + if (otherMapping.segId !== segId) continue; + if (otherPid === pid && otherAddr === mapAddr) continue; + return true; + } + } + return false; + } + + private syncSysvShmSegmentFromMappedProcesses(segId: number): void { + for (const [pid, pidMap] of this.shmMappings) { + const registration = this.processes.get(pid); + if (!registration) continue; + const processMem = new Uint8Array(registration.memory.buffer); + this.setKernelCurrentPid(pid); + for (const [mapAddr, mapping] of pidMap) { + if (mapping.segId === segId) { + this.syncSysvShmMappingFromProcess(processMem, mapAddr, mapping); + } + } + } + } + private syncSysvShmMappingFromProcess( processMem: Uint8Array, mapAddr: number, @@ -8354,7 +8409,7 @@ export class CentralizedKernelWorker { channelOffset: 0, i32View: new Int32Array(registration.memory.buffer, 0, 1), consecutiveSyscalls: 0, - }); + }, { force: true }); } this.setKernelCurrentPid(pid); @@ -9482,6 +9537,12 @@ export class CentralizedKernelWorker { // Set current pid for kernel_ipc_* exports this.setKernelCurrentPid(channel.pid); + // If this segment already has a host-side attachment, publish any + // single-observer writes before the new attachment reads its initial + // bytes from the kernel's segment backing. + this.syncSysvShmSegmentFromMappedProcesses(shmid); + this.setKernelCurrentPid(channel.pid); + const kernelShmat = this.kernelInstance!.exports.kernel_ipc_shmat as (shmid: number, shmaddr: number, flags: number) => number; const sizeOrErr = kernelShmat(shmid, _shmaddr, _flags); if (sizeOrErr < 0) { diff --git a/host/test/mmap-shared.test.ts b/host/test/mmap-shared.test.ts index e0aa64976..d1b338821 100644 --- a/host/test/mmap-shared.test.ts +++ b/host/test/mmap-shared.test.ts @@ -5,6 +5,7 @@ import { fileURLToPath } from "node:url"; import { runCentralizedProgram } from "./centralized-test-helper"; import { resolveBinary } from "../src/binary-resolver"; import { NodePlatformIO } from "../src/platform/node"; +import { CentralizedKernelWorker } from "../src/kernel-worker"; const __dirname = dirname(fileURLToPath(import.meta.url)); const repoRoot = join(__dirname, "../.."); @@ -17,6 +18,86 @@ const itIfAnonymousForkFixture = existsSync(anonymousForkFixture) ? it : it.skip const itIfMunmapReuseFixture = existsSync(munmapReuseFixture) ? it : it.skip; const itIfLargePwriteFixture = existsSync(largePwriteFixture) ? it : it.skip; +function createAnonymousSharedMmapHarness(refCount: number) { + const pid = 211; + const mapAddr = 0x3000; + const len = 4096; + const backingKey = "anon:test"; + const memory = new WebAssembly.Memory({ initial: 1, maximum: 1, shared: true }); + const backing = { + key: backingKey, + path: "", + handle: -1, + anonymous: true, + writable: true, + pages: new Map([[0, new Uint8Array(len)]]), + dirtyPages: new Set(), + refCount, + version: 0, + }; + const mapping = { + fd: -1, + fileOffset: 0, + len, + writable: true, + backingKey, + snapshot: new Uint8Array(len), + version: 0, + }; + const kw = Object.assign(Object.create(CentralizedKernelWorker.prototype), { + sharedMappings: new Map([[pid, new Map([[mapAddr, mapping]])]]), + sharedMmapBackings: new Map([[backingKey, backing]]), + }) as CentralizedKernelWorker; + const channel = { + pid, + memory, + channelOffset: 0, + i32View: new Int32Array(memory.buffer, 0, 1), + consecutiveSyscalls: 0, + }; + return { + backing, + channel, + kw, + mapAddr, + mapping, + processMem: new Uint8Array(memory.buffer), + }; +} + +describe("anonymous MAP_SHARED host synchronization", () => { + it("skips single-observer boundary publishes and publishes on forced handoff", () => { + const { backing, channel, kw, mapAddr, mapping, processMem } = + createAnonymousSharedMmapHarness(1); + + processMem[mapAddr + 23] = 0x4d; + (kw as any).syncSharedMappingsFromProcess(channel, true); + + expect(backing.version).toBe(0); + expect(backing.dirtyPages.size).toBe(0); + expect(mapping.snapshot[23]).toBe(0); + + (kw as any).syncSharedMappingsFromProcess(channel, true, { force: true }); + + expect(backing.version).toBe(1); + expect(backing.dirtyPages.has(0)).toBe(true); + expect(mapping.snapshot[23]).toBe(0x4d); + expect(backing.pages.get(0)![23]).toBe(0x4d); + }); + + it("publishes ordinary boundaries when another mapping observes the backing", () => { + const { backing, channel, kw, mapAddr, mapping, processMem } = + createAnonymousSharedMmapHarness(2); + + processMem[mapAddr + 31] = 0x91; + (kw as any).syncSharedMappingsFromProcess(channel, true); + + expect(backing.version).toBe(1); + expect(mapping.snapshot[31]).toBe(0x91); + expect(backing.pages.get(0)![31]).toBe(0x91); + }); +}); + describe("MAP_SHARED mmap + msync", () => { it("writes through MAP_SHARED mapping and flushes with msync", async () => { const result = await runCentralizedProgram({ diff --git a/host/test/multi-worker.test.ts b/host/test/multi-worker.test.ts index eb2fc53ab..bb604a004 100644 --- a/host/test/multi-worker.test.ts +++ b/host/test/multi-worker.test.ts @@ -402,6 +402,10 @@ describe("CentralizedKernelWorker Process Management", () => { threadForkContexts: new Map(), tcpListenerTargets: new Map(), epollInterests: new Map(), + sharedMappings: new Map(), + sharedMmapBackings: new Map(), + shmMappings: new Map(), + shmSegmentVersions: new Map(), inheritSharedMappings: vi.fn(), completeChannel, kernelInstance: { @@ -409,6 +413,7 @@ describe("CentralizedKernelWorker Process Management", () => { kernel_fork_process: kernelForkProcess, kernel_clear_fork_child: vi.fn(() => 0), kernel_reset_signal_mask: vi.fn(() => 0), + kernel_set_current_pid: vi.fn(), }, }, }); diff --git a/host/test/sysv-ipc.test.ts b/host/test/sysv-ipc.test.ts index 5ad55c9e1..93a178622 100644 --- a/host/test/sysv-ipc.test.ts +++ b/host/test/sysv-ipc.test.ts @@ -2,16 +2,132 @@ * Tests for SysV IPC: message queues, semaphores, and shared memory. * Verifies that the SharedIpcTable is properly wired up in the kernel worker. */ -import { describe, it, expect } from "vitest"; +import { describe, it, expect, vi } from "vitest"; import { join, dirname } from "node:path"; import { existsSync } from "node:fs"; import { fileURLToPath } from "node:url"; import { runCentralizedProgram } from "./centralized-test-helper"; +import { CentralizedKernelWorker } from "../src/kernel-worker"; const __dirname = dirname(fileURLToPath(import.meta.url)); const ipcBinary = join(__dirname, "../../examples/sysv_ipc_test.wasm"); const hasBinary = existsSync(ipcBinary); +function createSysvSyncHarness() { + const pid = 101; + const segId = 7; + const mapAddr = 0x2000; + const size = 4096; + const processMemory = new WebAssembly.Memory({ initial: 1, maximum: 1, shared: true }); + const kernelMemory = new WebAssembly.Memory({ initial: 2 }); + const backing = new Uint8Array(size); + const writes: Array<{ segId: number; offset: number; bytes: Uint8Array }> = []; + + const writeChunk = vi.fn((shmid: number, offset: number, dataPtr: number, dataLen: number) => { + const kernelMem = new Uint8Array(kernelMemory.buffer); + const bytes = kernelMem.slice(dataPtr, dataPtr + dataLen); + backing.set(bytes, offset); + writes.push({ segId: shmid, offset, bytes }); + return dataLen; + }); + + const readChunk = vi.fn((shmid: number, offset: number, outPtr: number, maxLen: number) => { + expect(shmid).toBe(segId); + const len = Math.min(maxLen, backing.length - offset); + new Uint8Array(kernelMemory.buffer).set(backing.subarray(offset, offset + len), outPtr); + return len; + }); + + const kw = Object.assign(Object.create(CentralizedKernelWorker.prototype), { + kernel: { toKernelPtr: (value: number | bigint) => Number(value) }, + kernelMemory, + kernelInstance: { + exports: { + kernel_set_current_pid: vi.fn(), + kernel_ipc_shm_write_chunk: writeChunk, + kernel_ipc_shm_read_chunk: readChunk, + kernel_ipc_shmdt: vi.fn(() => 0), + }, + }, + processes: new Map([ + [pid, { memory: processMemory, ptrWidth: 4 }], + ]), + shmMappings: new Map([ + [ + pid, + new Map([ + [ + mapAddr, + { + segId, + size, + readOnly: false, + snapshot: new Uint8Array(size), + version: 0, + }, + ], + ]), + ], + ]), + shmSegmentVersions: new Map([[segId, 0]]), + scratchOffset: 0, + }) as CentralizedKernelWorker; + + const channel = { + pid, + memory: processMemory, + channelOffset: 0, + i32View: new Int32Array(processMemory.buffer, 0, 1), + consecutiveSyscalls: 0, + }; + + return { + pid, + segId, + mapAddr, + processMemory, + processMem: new Uint8Array(processMemory.buffer), + kw, + channel, + writes, + writeChunk, + }; +} + +describe("SysV shared-memory host synchronization", () => { + it("skips single-observer syscall-boundary publishes but forces observer handoff publishes", () => { + const { + pid, + segId, + mapAddr, + processMem, + kw, + channel, + writes, + writeChunk, + } = createSysvSyncHarness(); + + processMem[mapAddr + 17] = 0x7b; + + (kw as any).synchronizeSysvShmMappingsForSyscallBoundary(channel); + expect(writeChunk).not.toHaveBeenCalled(); + + (kw as any).syncSysvShmSegmentFromMappedProcesses(segId); + expect(writes).toHaveLength(1); + expect(writes[0]!.segId).toBe(segId); + expect(writes[0]!.offset).toBe(0); + expect(writes[0]!.bytes[17]).toBe(0x7b); + + writes.length = 0; + writeChunk.mockClear(); + processMem[mapAddr + 41] = 0xa5; + + (kw as any).releaseAllSysvShmMappingsForProcess(pid); + expect(writeChunk).toHaveBeenCalledTimes(1); + expect(writes[0]!.bytes[41]).toBe(0xa5); + }); +}); + describe.skipIf(!hasBinary)("SysV IPC", () => { it("message queues, semaphores, shared memory", async () => { const result = await runCentralizedProgram({