diff --git a/CHANGES b/CHANGES index 8ae835854..416ce003e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,17 @@ Changes with FreeUnit 1.35.4 xx xxx 2026 + *) Security: bounds-check untrusted shared-memory offsets on the port + IPC and libunit deserialization paths (audit PR-B): reject + out-of-range chunk_id and chunk_id+nchunks in mmap messages + (nxt_port_mmap_get_incoming_buf), close a TOCTOU window between + incoming-mmap lookup and first hdr dereference by retaining the + handler refcount across the unlock, reject integer overflow in + the application-supplied (max_fields_count, max_fields_size) + response-buffer formula in nxt_unit_response_init/realloc, and + validate every nxt_unit_request_t sptr at request arrival before + any libunit consumer dereferences it. + *) Bugfix: fix router process CPU spin and connection hang under port scanning load; CLOSE-WAIT sockets are now cleaned up properly on client FIN, idle connection queue iteration fixed, systemd file diff --git a/src/nxt_port_memory.c b/src/nxt_port_memory.c index ad6e97ade..1f8399509 100644 --- a/src/nxt_port_memory.c +++ b/src/nxt_port_memory.c @@ -48,6 +48,19 @@ nxt_port_mmap_at(nxt_port_mmaps_t *port_mmaps, uint32_t i) { uint32_t cap; + /* + * Cap the index against a generous ceiling before the growth + * arithmetic. When called from the receive path, i flows in from + * hdr->id of a peer-mapped region; a forged value of 0xFFFFFFFF + * would otherwise wrap (i + 1) to 0 in both the initial cap + * assignment and the loop condition, skip realloc, and return + * port_mmaps->elts + 0xFFFFFFFF — an out-of-array pointer the + * caller then writes through. + */ + if (nxt_slow_path(i >= PORT_MMAP_MAX_REGIONS)) { + return NULL; + } + cap = port_mmaps->cap; if (cap == 0) { @@ -531,6 +544,16 @@ nxt_port_get_port_incoming_mmap(nxt_task_t *task, nxt_pid_t spid, uint32_t id) if (nxt_fast_path(process->incoming.size > id)) { mmap_handler = process->incoming.elts[id].mmap_handler; + /* + * Bump refcount under the mutex so the handler cannot be unmapped + * by a concurrent peer-side close between this lookup and the + * caller's first dereference of mmap_handler->hdr. The caller + * adopts this reference and is responsible for releasing it. + */ + if (mmap_handler != NULL) { + nxt_port_mmap_handler_use(mmap_handler, 1); + } + } else { mmap_handler = NULL; @@ -543,6 +566,26 @@ nxt_port_get_port_incoming_mmap(nxt_task_t *task, nxt_pid_t spid, uint32_t id) } +/* + * Validate that a peer-supplied (chunk_id, nchunks) pair refers to a + * region wholly inside the mapped data area. Returns non-zero on + * success. Underflow-safe: subtracts on the constant side. + */ +nxt_inline nxt_bool_t +nxt_port_mmap_chunk_range_valid(nxt_chunk_id_t chunk_id, size_t nchunks) +{ + if (chunk_id >= PORT_MMAP_CHUNK_COUNT) { + return 0; + } + + if (nchunks > (size_t) PORT_MMAP_CHUNK_COUNT - chunk_id) { + return 0; + } + + return 1; +} + + nxt_buf_t * nxt_port_mmap_get_buf(nxt_task_t *task, nxt_port_mmaps_t *mmaps, size_t size) { @@ -679,8 +722,31 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, return NULL; } + /* + * mmap_msg fields originate from a peer process; reject offsets + * that would point outside the mapped data area before they reach + * pointer arithmetic below. See security-audit.md V5. + */ + nchunks = mmap_msg->size / PORT_MMAP_CHUNK_SIZE; + if ((mmap_msg->size % PORT_MMAP_CHUNK_SIZE) != 0) { + nchunks++; + } + + if (nxt_slow_path(!nxt_port_mmap_chunk_range_valid(mmap_msg->chunk_id, + nchunks))) + { + nxt_alert(task, "invalid mmap message from pid %PI: " + "chunk_id %uD, size %uD (chunks %uz, max %d)", + spid, mmap_msg->chunk_id, mmap_msg->size, + nchunks, PORT_MMAP_CHUNK_COUNT); + + nxt_port_mmap_handler_use(mmap_handler, -1); + return NULL; + } + b = nxt_buf_mem_ts_alloc(task, port->mem_pool, 0); if (nxt_slow_path(b == NULL)) { + nxt_port_mmap_handler_use(mmap_handler, -1); return NULL; } @@ -688,11 +754,6 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, nxt_buf_set_port_mmap(b); - nchunks = mmap_msg->size / PORT_MMAP_CHUNK_SIZE; - if ((mmap_msg->size % PORT_MMAP_CHUNK_SIZE) != 0) { - nchunks++; - } - hdr = mmap_handler->hdr; b->mem.start = nxt_port_mmap_chunk_start(hdr, mmap_msg->chunk_id); @@ -701,7 +762,7 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, b->mem.end = b->mem.start + nchunks * PORT_MMAP_CHUNK_SIZE; b->parent = mmap_handler; - nxt_port_mmap_handler_use(mmap_handler, 1); + /* Adopts the reference taken by nxt_port_get_port_incoming_mmap(). */ nxt_debug(task, "incoming mmap buf allocation: %p [%p,%uz] %PI->%PI,%d,%d", b, b->mem.start, b->mem.end - b->mem.start, diff --git a/src/nxt_port_memory_int.h b/src/nxt_port_memory_int.h index 21a05b104..64a89b912 100644 --- a/src/nxt_port_memory_int.h +++ b/src/nxt_port_memory_int.h @@ -30,6 +30,14 @@ #define PORT_MMAP_SIZE (PORT_MMAP_HEADER_SIZE + PORT_MMAP_DATA_SIZE) #define PORT_MMAP_CHUNK_COUNT (PORT_MMAP_DATA_SIZE / PORT_MMAP_CHUNK_SIZE) +/* + * Generous upper bound on the number of shared-memory regions tracked + * per peer. Used to cap peer-supplied mmap indices before the growth + * arithmetic in nxt_port_mmap_at(), so a forged hdr->id = 0xFFFFFFFF + * cannot wrap the growth loop and return an out-of-array pointer. + */ +#define PORT_MMAP_MAX_REGIONS (1u << 20) + typedef uint32_t nxt_chunk_id_t; diff --git a/src/nxt_router.c b/src/nxt_router.c index c994f3707..e9fe70b7d 100644 --- a/src/nxt_router.c +++ b/src/nxt_router.c @@ -4235,6 +4235,28 @@ nxt_router_response_ready_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg, field->name_length = f->name_length; field->value_length = f->value_length; + + /* + * The response is built inside an mmap region the app worker + * writes; sptr offsets are peer-supplied. Vet the offset + + * length against the response buffer before dereferencing, + * otherwise a buggy or malicious worker can encode arbitrary + * offsets and have the router serialise bytes from outside + * the response buffer (cross-request bleed within the same + * mmap region, or beyond) onto the wire. + */ + if (nxt_slow_path( + !nxt_unit_sptr_in_buf(&f->name, f->name_length, + b->mem.pos, b_size) + || !nxt_unit_sptr_in_buf(&f->value, f->value_length, + b->mem.pos, b_size))) + { + nxt_alert(task, "response field sptr out of buffer " + "(name_length=%d, value_length=%D)", + (int) f->name_length, f->value_length); + goto fail; + } + field->name = nxt_unit_sptr_get(&f->name); field->value = nxt_unit_sptr_get(&f->value); @@ -4256,6 +4278,16 @@ nxt_router_response_ready_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg, r->status = resp->status; if (resp->piggyback_content_length != 0) { + if (nxt_slow_path( + !nxt_unit_sptr_in_buf(&resp->piggyback_content, + resp->piggyback_content_length, + b->mem.pos, b_size))) + { + nxt_alert(task, "response piggyback sptr out of buffer " + "(length=%D)", resp->piggyback_content_length); + goto fail; + } + b->mem.pos = nxt_unit_sptr_get(&resp->piggyback_content); b->mem.free = b->mem.pos + resp->piggyback_content_length; diff --git a/src/nxt_unit.c b/src/nxt_unit.c index 7d523beb8..c2ce99592 100644 --- a/src/nxt_unit.c +++ b/src/nxt_unit.c @@ -203,6 +203,43 @@ static void nxt_unit_lvlhsh_free(void *data, void *p); static int nxt_unit_memcasecmp(const void *p1, const void *p2, size_t length); +/* + * Compute the response buffer size for a given (max_fields_count, + * max_fields_size) pair, rejecting integer overflow. Both inputs come + * from the application; an overflow here would yield an undersized + * allocation that subsequent field memcpy()s overrun. See + * security-audit.md V10. + */ +static int +nxt_unit_response_buf_size(uint32_t max_fields_count, + uint32_t max_fields_size, uint32_t *buf_size) +{ + /* + * Each field name and value is 0-terminated by libunit, hence + * the '+ 2' per field (matches the historical formula). + */ + uint32_t total; + + if (max_fields_count + > (UINT32_MAX - (uint32_t) sizeof(nxt_unit_response_t)) + / (uint32_t) (sizeof(nxt_unit_field_t) + 2)) + { + return NXT_UNIT_ERROR; + } + + total = (uint32_t) sizeof(nxt_unit_response_t) + + max_fields_count * (uint32_t) (sizeof(nxt_unit_field_t) + 2); + + if (max_fields_size > UINT32_MAX - total) { + return NXT_UNIT_ERROR; + } + + *buf_size = total + max_fields_size; + + return NXT_UNIT_OK; +} + + struct nxt_unit_mmap_buf_s { nxt_unit_buf_t buf; @@ -1303,6 +1340,46 @@ nxt_unit_process_req_headers(nxt_unit_ctx_t *ctx, nxt_unit_recv_msg_t *recv_msg, return NXT_UNIT_ERROR; } + /* + * Validate every sptr in the request struct before any code path + * dereferences it. Offsets originate from the router (a more + * privileged peer) but the libunit ABI is also reachable from + * attacker-influenced input shapes; keeping the validation + * co-located with arrival makes the trust boundary explicit. + * See security-audit.md V10 (sptr offset). + */ + { + nxt_unit_request_t *vr = recv_msg->start; + uint32_t vsize = recv_msg->size; + + if (nxt_slow_path( + !nxt_unit_sptr_in_buf(&vr->method, vr->method_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->version, vr->version_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->remote, vr->remote_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->local_addr, vr->local_addr_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->local_port, vr->local_port_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->server_name, vr->server_name_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->target, vr->target_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->path, vr->path_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->query, vr->query_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->preread_content, 0, + recv_msg->start, vsize))) + { + nxt_unit_warn(ctx, "#%"PRIu32": malformed request: " + "sptr out of buffer", recv_msg->stream); + return NXT_UNIT_ERROR; + } + } + req_impl = nxt_unit_request_info_get(ctx); if (nxt_slow_path(req_impl == NULL)) { nxt_unit_warn(ctx, "#%"PRIu32": request info allocation failed", @@ -2042,13 +2119,15 @@ nxt_unit_response_init(nxt_unit_request_info_t *req, nxt_unit_req_debug(req, "duplicate response init"); } - /* - * Each field name and value 0-terminated by libunit, - * this is the reason of '+ 2' below. - */ - buf_size = sizeof(nxt_unit_response_t) - + max_fields_count * (sizeof(nxt_unit_field_t) + 2) - + max_fields_size; + if (nxt_slow_path(nxt_unit_response_buf_size(max_fields_count, + max_fields_size, + &buf_size) != NXT_UNIT_OK)) + { + nxt_unit_req_alert(req, "init: response buffer size overflow " + "(max_fields_count=%"PRIu32", max_fields_size=%"PRIu32")", + max_fields_count, max_fields_size); + return NXT_UNIT_ERROR; + } if (nxt_slow_path(req->response_buf != NULL)) { buf = req->response_buf; @@ -2121,13 +2200,15 @@ nxt_unit_response_realloc(nxt_unit_request_info_t *req, return NXT_UNIT_ERROR; } - /* - * Each field name and value 0-terminated by libunit, - * this is the reason of '+ 2' below. - */ - buf_size = sizeof(nxt_unit_response_t) - + max_fields_count * (sizeof(nxt_unit_field_t) + 2) - + max_fields_size; + if (nxt_slow_path(nxt_unit_response_buf_size(max_fields_count, + max_fields_size, + &buf_size) != NXT_UNIT_OK)) + { + nxt_unit_req_alert(req, "realloc: response buffer size overflow " + "(max_fields_count=%"PRIu32", max_fields_size=%"PRIu32")", + max_fields_count, max_fields_size); + return NXT_UNIT_ERROR; + } nxt_unit_req_debug(req, "realloc %"PRIu32"", buf_size); diff --git a/src/nxt_unit_sptr.h b/src/nxt_unit_sptr.h index 314416e4e..37d89e70b 100644 --- a/src/nxt_unit_sptr.h +++ b/src/nxt_unit_sptr.h @@ -35,4 +35,48 @@ nxt_unit_sptr_get(nxt_unit_sptr_t *sptr) } +/* + * Validate that an sptr field within a peer-supplied buffer dereferences + * to a [length]-byte range that is wholly inside the buffer. Used to + * vet every peer-supplied sptr (request fields on the libunit side, + * response fields on the router side) before the receiver follows the + * embedded offset. + * + * sptr->base aliases the address of the sptr itself (the union encodes + * an offset relative to that location), so this also implicitly checks + * that the sptr is inside the buffer. + * + * Underflow-safe: subtracts on the constant side throughout. + */ +static inline int +nxt_unit_sptr_in_buf(nxt_unit_sptr_t *sptr, uint32_t length, + void *buf_start, uint32_t buf_size) +{ + size_t sptr_off, end_off; + + if ((uint8_t *) sptr < (uint8_t *) buf_start) { + return 0; + } + + sptr_off = (uint8_t *) sptr - (uint8_t *) buf_start; + + if (buf_size < sizeof(nxt_unit_sptr_t) + || sptr_off > buf_size - sizeof(nxt_unit_sptr_t)) + { + return 0; + } + + if (sptr->offset > buf_size - sptr_off) { + return 0; + } + + end_off = sptr_off + sptr->offset; + if (length > buf_size - end_off) { + return 0; + } + + return 1; +} + + #endif /* _NXT_UNIT_SPTR_H_INCLUDED_ */