From d010e2075f096cd91d3216d07e7db792fd5297cb Mon Sep 17 00:00:00 2001 From: Andy Postnikov Date: Fri, 8 May 2026 12:49:26 +0200 Subject: [PATCH 1/2] fix(security): bounds-check untrusted shmem offsets in port and libunit Audit-driven hardening pass clustered around the trust boundary where one process writes a struct into shared memory and another process reads it. Five findings (security-audit.md V5/V10), one helper per file, no public-API or libunit-ABI changes. V5 [High] chunk_id no bounds check (src/nxt_port_memory.c:698 on master @ 7b126961). Peer-supplied chunk_id was passed to nxt_port_mmap_chunk_start() without checking against PORT_MMAP_CHUNK_COUNT, yielding an OOB pointer. Reject in nxt_port_mmap_get_incoming_buf() before pointer arithmetic. V5 [High] chunk_id + nchunks past mapped region (line 701). nchunks is computed from untrusted mmap_msg->size; a peer could land b->mem.end past the mapped segment. Combined with #1 above into a single overflow-safe range check (chunk_id + nchunks <= PORT_MMAP_CHUNK_COUNT, written as nchunks <= PORT_MMAP_CHUNK_COUNT - chunk_id after the chunk_id bound holds). V5 [Medium] TOCTOU on shmem mmap_id lookup (lines 676-678). process->incoming.mutex was released between locating mmap_handler and the caller's first dereference of mmap_handler->hdr; a concurrent peer-side close could free the handler in that window. nxt_port_get_port_incoming_mmap now bumps the existing atomic refcount under the mutex; the sole caller adopts the reference (the redundant explicit handler_use(+1) at the buf-parent assignment site is removed) and releases it on every error path. V10 [Medium] max_fields_count * sizeof(field) + 2 overflow on response (src/nxt_unit.c:2049-2051, 2128-2130). Application-supplied counts/sizes were multiplied without overflow checking; wrap-around produced an undersized buffer that subsequent field memcpy()s overran. New file-static helper nxt_unit_response_buf_size() computes the size with UINT32_MAX-bounded checks at both multiplication and addition; nxt_unit_response_init/realloc reject overflow with a libunit error. V10 [Low] sptr offset dereferenced without bounds (line 1329 etc.). Every nxt_unit_request_t sptr is now validated at request-arrival time inside nxt_unit_process_req_headers() before any consumer dereferences it. New file-static helper nxt_unit_sptr_in_buf(sptr, length, buf, size) performs the underflow-safe check; one-shot validation keeps the per-deref hot paths and the libunit ABI unchanged. Cross-cutting: file-static helpers nxt_port_mmap_chunk_range_valid() in src/nxt_port_memory.c and nxt_unit_sptr_in_buf() / nxt_unit_response_buf_size() in src/nxt_unit.c. None exported through public headers; nxt_unit_sptr.h, nxt_unit_request.h, nxt_unit_field.h, and nxt_unit.h struct layouts are unchanged. Builds clean with ./configure --openssl && make under -Werror. nxt_unit.c also compile-checked standalone with the stock CFLAGS. pytest-3 --collect-only test/ collects 898 tests; root-required runs were not exercised in the worktree. Co-Authored-By: Claude Opus 4.7 --- CHANGES | 11 +++ src/nxt_port_memory.c | 60 +++++++++++++++-- src/nxt_unit.c | 153 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 204 insertions(+), 20 deletions(-) diff --git a/CHANGES b/CHANGES index 8ae835854..416ce003e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,17 @@ Changes with FreeUnit 1.35.4 xx xxx 2026 + *) Security: bounds-check untrusted shared-memory offsets on the port + IPC and libunit deserialization paths (audit PR-B): reject + out-of-range chunk_id and chunk_id+nchunks in mmap messages + (nxt_port_mmap_get_incoming_buf), close a TOCTOU window between + incoming-mmap lookup and first hdr dereference by retaining the + handler refcount across the unlock, reject integer overflow in + the application-supplied (max_fields_count, max_fields_size) + response-buffer formula in nxt_unit_response_init/realloc, and + validate every nxt_unit_request_t sptr at request arrival before + any libunit consumer dereferences it. + *) Bugfix: fix router process CPU spin and connection hang under port scanning load; CLOSE-WAIT sockets are now cleaned up properly on client FIN, idle connection queue iteration fixed, systemd file diff --git a/src/nxt_port_memory.c b/src/nxt_port_memory.c index ad6e97ade..3dbcf0039 100644 --- a/src/nxt_port_memory.c +++ b/src/nxt_port_memory.c @@ -531,6 +531,16 @@ nxt_port_get_port_incoming_mmap(nxt_task_t *task, nxt_pid_t spid, uint32_t id) if (nxt_fast_path(process->incoming.size > id)) { mmap_handler = process->incoming.elts[id].mmap_handler; + /* + * Bump refcount under the mutex so the handler cannot be unmapped + * by a concurrent peer-side close between this lookup and the + * caller's first dereference of mmap_handler->hdr. The caller + * adopts this reference and is responsible for releasing it. + */ + if (mmap_handler != NULL) { + nxt_port_mmap_handler_use(mmap_handler, 1); + } + } else { mmap_handler = NULL; @@ -543,6 +553,26 @@ nxt_port_get_port_incoming_mmap(nxt_task_t *task, nxt_pid_t spid, uint32_t id) } +/* + * Validate that a peer-supplied (chunk_id, nchunks) pair refers to a + * region wholly inside the mapped data area. Returns non-zero on + * success. Underflow-safe: subtracts on the constant side. + */ +nxt_inline nxt_bool_t +nxt_port_mmap_chunk_range_valid(nxt_chunk_id_t chunk_id, size_t nchunks) +{ + if (chunk_id >= PORT_MMAP_CHUNK_COUNT) { + return 0; + } + + if (nchunks > (size_t) PORT_MMAP_CHUNK_COUNT - chunk_id) { + return 0; + } + + return 1; +} + + nxt_buf_t * nxt_port_mmap_get_buf(nxt_task_t *task, nxt_port_mmaps_t *mmaps, size_t size) { @@ -679,8 +709,31 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, return NULL; } + /* + * mmap_msg fields originate from a peer process; reject offsets + * that would point outside the mapped data area before they reach + * pointer arithmetic below. See security-audit.md V5. + */ + nchunks = mmap_msg->size / PORT_MMAP_CHUNK_SIZE; + if ((mmap_msg->size % PORT_MMAP_CHUNK_SIZE) != 0) { + nchunks++; + } + + if (nxt_slow_path(!nxt_port_mmap_chunk_range_valid(mmap_msg->chunk_id, + nchunks))) + { + nxt_alert(task, "invalid mmap message from pid %PI: " + "chunk_id %uD, size %uD (chunks %uz, max %d)", + spid, mmap_msg->chunk_id, mmap_msg->size, + nchunks, PORT_MMAP_CHUNK_COUNT); + + nxt_port_mmap_handler_use(mmap_handler, -1); + return NULL; + } + b = nxt_buf_mem_ts_alloc(task, port->mem_pool, 0); if (nxt_slow_path(b == NULL)) { + nxt_port_mmap_handler_use(mmap_handler, -1); return NULL; } @@ -688,11 +741,6 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, nxt_buf_set_port_mmap(b); - nchunks = mmap_msg->size / PORT_MMAP_CHUNK_SIZE; - if ((mmap_msg->size % PORT_MMAP_CHUNK_SIZE) != 0) { - nchunks++; - } - hdr = mmap_handler->hdr; b->mem.start = nxt_port_mmap_chunk_start(hdr, mmap_msg->chunk_id); @@ -701,7 +749,7 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, b->mem.end = b->mem.start + nchunks * PORT_MMAP_CHUNK_SIZE; b->parent = mmap_handler; - nxt_port_mmap_handler_use(mmap_handler, 1); + /* Adopts the reference taken by nxt_port_get_port_incoming_mmap(). */ nxt_debug(task, "incoming mmap buf allocation: %p [%p,%uz] %PI->%PI,%d,%d", b, b->mem.start, b->mem.end - b->mem.start, diff --git a/src/nxt_unit.c b/src/nxt_unit.c index 7d523beb8..bfb187ef1 100644 --- a/src/nxt_unit.c +++ b/src/nxt_unit.c @@ -203,6 +203,87 @@ static void nxt_unit_lvlhsh_free(void *data, void *p); static int nxt_unit_memcasecmp(const void *p1, const void *p2, size_t length); +/* + * Compute the response buffer size for a given (max_fields_count, + * max_fields_size) pair, rejecting integer overflow. Both inputs come + * from the application; an overflow here would yield an undersized + * allocation that subsequent field memcpy()s overrun. See + * security-audit.md V10. + */ +static int +nxt_unit_response_buf_size(uint32_t max_fields_count, + uint32_t max_fields_size, uint32_t *buf_size) +{ + /* + * Each field name and value is 0-terminated by libunit, hence + * the '+ 2' per field (matches the historical formula). + */ + uint32_t total; + + if (max_fields_count + > (UINT32_MAX - (uint32_t) sizeof(nxt_unit_response_t)) + / (uint32_t) (sizeof(nxt_unit_field_t) + 2)) + { + return NXT_UNIT_ERROR; + } + + total = (uint32_t) sizeof(nxt_unit_response_t) + + max_fields_count * (uint32_t) (sizeof(nxt_unit_field_t) + 2); + + if (max_fields_size > UINT32_MAX - total) { + return NXT_UNIT_ERROR; + } + + *buf_size = total + max_fields_size; + + return NXT_UNIT_OK; +} + + +/* + * Validate that an sptr field within a peer-supplied buffer dereferences + * to a [length]-byte range that is wholly inside the buffer. Used at + * request-arrival time to vet every sptr in nxt_unit_request_t before + * the application sees it. See security-audit.md V10. + * + * sptr->base aliases the address of the sptr itself (the union encodes + * an offset relative to that location), so this also implicitly checks + * that the sptr is inside the buffer. + */ +static int +nxt_unit_sptr_in_buf(nxt_unit_sptr_t *sptr, uint32_t length, + void *buf_start, uint32_t buf_size) +{ + size_t sptr_off, end_off; + + if ((uint8_t *) sptr < (uint8_t *) buf_start) { + return 0; + } + + sptr_off = (uint8_t *) sptr - (uint8_t *) buf_start; + /* + * Underflow-safe: subtract on the constant side throughout. The + * sptr struct itself must fit inside the buffer before we + * dereference sptr->offset, so check sizeof(*sptr) here rather + * than just sptr_off > buf_size. + */ + if (sptr_off > buf_size - sizeof(nxt_unit_sptr_t)) { + return 0; + } + + if (sptr->offset > buf_size - sptr_off) { + return 0; + } + + end_off = sptr_off + sptr->offset; + if (length > buf_size - end_off) { + return 0; + } + + return 1; +} + + struct nxt_unit_mmap_buf_s { nxt_unit_buf_t buf; @@ -1303,6 +1384,46 @@ nxt_unit_process_req_headers(nxt_unit_ctx_t *ctx, nxt_unit_recv_msg_t *recv_msg, return NXT_UNIT_ERROR; } + /* + * Validate every sptr in the request struct before any code path + * dereferences it. Offsets originate from the router (a more + * privileged peer) but the libunit ABI is also reachable from + * attacker-influenced input shapes; keeping the validation + * co-located with arrival makes the trust boundary explicit. + * See security-audit.md V10 (sptr offset). + */ + { + nxt_unit_request_t *vr = recv_msg->start; + uint32_t vsize = recv_msg->size; + + if (nxt_slow_path( + !nxt_unit_sptr_in_buf(&vr->method, vr->method_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->version, vr->version_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->remote, vr->remote_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->local_addr, vr->local_addr_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->local_port, vr->local_port_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->server_name, vr->server_name_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->target, vr->target_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->path, vr->path_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->query, vr->query_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->preread_content, 0, + recv_msg->start, vsize))) + { + nxt_unit_warn(ctx, "#%"PRIu32": malformed request: " + "sptr out of buffer", recv_msg->stream); + return NXT_UNIT_ERROR; + } + } + req_impl = nxt_unit_request_info_get(ctx); if (nxt_slow_path(req_impl == NULL)) { nxt_unit_warn(ctx, "#%"PRIu32": request info allocation failed", @@ -2042,13 +2163,15 @@ nxt_unit_response_init(nxt_unit_request_info_t *req, nxt_unit_req_debug(req, "duplicate response init"); } - /* - * Each field name and value 0-terminated by libunit, - * this is the reason of '+ 2' below. - */ - buf_size = sizeof(nxt_unit_response_t) - + max_fields_count * (sizeof(nxt_unit_field_t) + 2) - + max_fields_size; + if (nxt_slow_path(nxt_unit_response_buf_size(max_fields_count, + max_fields_size, + &buf_size) != NXT_UNIT_OK)) + { + nxt_unit_req_alert(req, "init: response buffer size overflow " + "(max_fields_count=%"PRIu32", max_fields_size=%"PRIu32")", + max_fields_count, max_fields_size); + return NXT_UNIT_ERROR; + } if (nxt_slow_path(req->response_buf != NULL)) { buf = req->response_buf; @@ -2121,13 +2244,15 @@ nxt_unit_response_realloc(nxt_unit_request_info_t *req, return NXT_UNIT_ERROR; } - /* - * Each field name and value 0-terminated by libunit, - * this is the reason of '+ 2' below. - */ - buf_size = sizeof(nxt_unit_response_t) - + max_fields_count * (sizeof(nxt_unit_field_t) + 2) - + max_fields_size; + if (nxt_slow_path(nxt_unit_response_buf_size(max_fields_count, + max_fields_size, + &buf_size) != NXT_UNIT_OK)) + { + nxt_unit_req_alert(req, "realloc: response buffer size overflow " + "(max_fields_count=%"PRIu32", max_fields_size=%"PRIu32")", + max_fields_count, max_fields_size); + return NXT_UNIT_ERROR; + } nxt_unit_req_debug(req, "realloc %"PRIu32"", buf_size); From c99f5fbfe36e50e6298a49b60bad435394fa5606 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 12 Jun 2026 15:47:12 +0000 Subject: [PATCH 2/2] fix(security): close two coverage gaps in shmem bounds pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups that sit squarely inside this PR's named scope — "bounds-check untrusted shmem offsets in port and libunit" — surfaced during integration audit on the post-merge codebase. 1. nxt_port_mmap_at() index cap (src/nxt_port_memory.c:47) When called from the receive path, the i parameter is hdr->id of a peer-mapped shared-memory region. A forged value of 0xFFFFFFFF wraps (i + 1) to 0 in both the initial cap assignment and the growth loop condition, skips realloc, and returns port_mmaps->elts + 0xFFFFFFFF — an out-of-array pointer the caller then writes through (nxt_port_get_port_incoming_mmap stores mmap_handler at that offset). Worker-to-main wild write at root. Cap i against a generous ceiling (PORT_MMAP_MAX_REGIONS = 1 << 20) at function entry. Symmetric with the (chunk_id, nchunks) helper this PR already introduces; the chunk_id helper covers the receive-path chunk arena, this cap covers the mmap-region index that flows through the same receive path one level up. 2. Response-side sptr validation in the router (src/nxt_router.c:4221) The router resolves response field sptrs (name / value) and the piggyback content sptr via nxt_unit_sptr_get() without bounds checking the embedded offset against the response buffer. A buggy or malicious worker can encode arbitrary offsets and have the router serialise bytes from outside the response buffer (within the same mmap region) onto the wire — a cross-request bleed inside the worker's own shmem area. This PR already added nxt_unit_sptr_in_buf() for the request-side sptrs on the libunit side. Move that helper from nxt_unit.c into nxt_unit_sptr.h so the router can use it too, then call it for each f->name / f->value / resp->piggyback_content sptr at the router's response-resolve site. No new helper: same shape, same underflow-safe pattern, same constant-side subtraction. The libunit write-side memset(0xA5) poison-fill site flagged alongside these by parallel review is intentionally not addressed here: it executes inside the worker process, with both source and length owned by the worker itself, so it does not cross a trust boundary. Filing as a hardening backlog note. --- src/nxt_port_memory.c | 13 ++++++++++++ src/nxt_port_memory_int.h | 8 +++++++ src/nxt_router.c | 32 ++++++++++++++++++++++++++++ src/nxt_unit.c | 44 --------------------------------------- src/nxt_unit_sptr.h | 44 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 44 deletions(-) diff --git a/src/nxt_port_memory.c b/src/nxt_port_memory.c index 3dbcf0039..1f8399509 100644 --- a/src/nxt_port_memory.c +++ b/src/nxt_port_memory.c @@ -48,6 +48,19 @@ nxt_port_mmap_at(nxt_port_mmaps_t *port_mmaps, uint32_t i) { uint32_t cap; + /* + * Cap the index against a generous ceiling before the growth + * arithmetic. When called from the receive path, i flows in from + * hdr->id of a peer-mapped region; a forged value of 0xFFFFFFFF + * would otherwise wrap (i + 1) to 0 in both the initial cap + * assignment and the loop condition, skip realloc, and return + * port_mmaps->elts + 0xFFFFFFFF — an out-of-array pointer the + * caller then writes through. + */ + if (nxt_slow_path(i >= PORT_MMAP_MAX_REGIONS)) { + return NULL; + } + cap = port_mmaps->cap; if (cap == 0) { diff --git a/src/nxt_port_memory_int.h b/src/nxt_port_memory_int.h index 21a05b104..64a89b912 100644 --- a/src/nxt_port_memory_int.h +++ b/src/nxt_port_memory_int.h @@ -30,6 +30,14 @@ #define PORT_MMAP_SIZE (PORT_MMAP_HEADER_SIZE + PORT_MMAP_DATA_SIZE) #define PORT_MMAP_CHUNK_COUNT (PORT_MMAP_DATA_SIZE / PORT_MMAP_CHUNK_SIZE) +/* + * Generous upper bound on the number of shared-memory regions tracked + * per peer. Used to cap peer-supplied mmap indices before the growth + * arithmetic in nxt_port_mmap_at(), so a forged hdr->id = 0xFFFFFFFF + * cannot wrap the growth loop and return an out-of-array pointer. + */ +#define PORT_MMAP_MAX_REGIONS (1u << 20) + typedef uint32_t nxt_chunk_id_t; diff --git a/src/nxt_router.c b/src/nxt_router.c index c994f3707..e9fe70b7d 100644 --- a/src/nxt_router.c +++ b/src/nxt_router.c @@ -4235,6 +4235,28 @@ nxt_router_response_ready_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg, field->name_length = f->name_length; field->value_length = f->value_length; + + /* + * The response is built inside an mmap region the app worker + * writes; sptr offsets are peer-supplied. Vet the offset + + * length against the response buffer before dereferencing, + * otherwise a buggy or malicious worker can encode arbitrary + * offsets and have the router serialise bytes from outside + * the response buffer (cross-request bleed within the same + * mmap region, or beyond) onto the wire. + */ + if (nxt_slow_path( + !nxt_unit_sptr_in_buf(&f->name, f->name_length, + b->mem.pos, b_size) + || !nxt_unit_sptr_in_buf(&f->value, f->value_length, + b->mem.pos, b_size))) + { + nxt_alert(task, "response field sptr out of buffer " + "(name_length=%d, value_length=%D)", + (int) f->name_length, f->value_length); + goto fail; + } + field->name = nxt_unit_sptr_get(&f->name); field->value = nxt_unit_sptr_get(&f->value); @@ -4256,6 +4278,16 @@ nxt_router_response_ready_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg, r->status = resp->status; if (resp->piggyback_content_length != 0) { + if (nxt_slow_path( + !nxt_unit_sptr_in_buf(&resp->piggyback_content, + resp->piggyback_content_length, + b->mem.pos, b_size))) + { + nxt_alert(task, "response piggyback sptr out of buffer " + "(length=%D)", resp->piggyback_content_length); + goto fail; + } + b->mem.pos = nxt_unit_sptr_get(&resp->piggyback_content); b->mem.free = b->mem.pos + resp->piggyback_content_length; diff --git a/src/nxt_unit.c b/src/nxt_unit.c index bfb187ef1..c2ce99592 100644 --- a/src/nxt_unit.c +++ b/src/nxt_unit.c @@ -240,50 +240,6 @@ nxt_unit_response_buf_size(uint32_t max_fields_count, } -/* - * Validate that an sptr field within a peer-supplied buffer dereferences - * to a [length]-byte range that is wholly inside the buffer. Used at - * request-arrival time to vet every sptr in nxt_unit_request_t before - * the application sees it. See security-audit.md V10. - * - * sptr->base aliases the address of the sptr itself (the union encodes - * an offset relative to that location), so this also implicitly checks - * that the sptr is inside the buffer. - */ -static int -nxt_unit_sptr_in_buf(nxt_unit_sptr_t *sptr, uint32_t length, - void *buf_start, uint32_t buf_size) -{ - size_t sptr_off, end_off; - - if ((uint8_t *) sptr < (uint8_t *) buf_start) { - return 0; - } - - sptr_off = (uint8_t *) sptr - (uint8_t *) buf_start; - /* - * Underflow-safe: subtract on the constant side throughout. The - * sptr struct itself must fit inside the buffer before we - * dereference sptr->offset, so check sizeof(*sptr) here rather - * than just sptr_off > buf_size. - */ - if (sptr_off > buf_size - sizeof(nxt_unit_sptr_t)) { - return 0; - } - - if (sptr->offset > buf_size - sptr_off) { - return 0; - } - - end_off = sptr_off + sptr->offset; - if (length > buf_size - end_off) { - return 0; - } - - return 1; -} - - struct nxt_unit_mmap_buf_s { nxt_unit_buf_t buf; diff --git a/src/nxt_unit_sptr.h b/src/nxt_unit_sptr.h index 314416e4e..37d89e70b 100644 --- a/src/nxt_unit_sptr.h +++ b/src/nxt_unit_sptr.h @@ -35,4 +35,48 @@ nxt_unit_sptr_get(nxt_unit_sptr_t *sptr) } +/* + * Validate that an sptr field within a peer-supplied buffer dereferences + * to a [length]-byte range that is wholly inside the buffer. Used to + * vet every peer-supplied sptr (request fields on the libunit side, + * response fields on the router side) before the receiver follows the + * embedded offset. + * + * sptr->base aliases the address of the sptr itself (the union encodes + * an offset relative to that location), so this also implicitly checks + * that the sptr is inside the buffer. + * + * Underflow-safe: subtracts on the constant side throughout. + */ +static inline int +nxt_unit_sptr_in_buf(nxt_unit_sptr_t *sptr, uint32_t length, + void *buf_start, uint32_t buf_size) +{ + size_t sptr_off, end_off; + + if ((uint8_t *) sptr < (uint8_t *) buf_start) { + return 0; + } + + sptr_off = (uint8_t *) sptr - (uint8_t *) buf_start; + + if (buf_size < sizeof(nxt_unit_sptr_t) + || sptr_off > buf_size - sizeof(nxt_unit_sptr_t)) + { + return 0; + } + + if (sptr->offset > buf_size - sptr_off) { + return 0; + } + + end_off = sptr_off + sptr->offset; + if (length > buf_size - end_off) { + return 0; + } + + return 1; +} + + #endif /* _NXT_UNIT_SPTR_H_INCLUDED_ */