diff --git a/Cargo.lock b/Cargo.lock index ed2c0a81..f7e9a3de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4843,6 +4843,7 @@ dependencies = [ "log", "mime_guess", "nanoid", + "nix", "paddler_messaging", "paddler_openai_response_format_validator", "paddler_state_conversion", diff --git a/paddler_balancer/Cargo.toml b/paddler_balancer/Cargo.toml index 0be34360..264b493c 100644 --- a/paddler_balancer/Cargo.toml +++ b/paddler_balancer/Cargo.toml @@ -44,6 +44,9 @@ esbuild-metafile = { workspace = true, optional = true } mime_guess = { workspace = true, optional = true } rust-embed = { workspace = true, optional = true } +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = ["resource"] } + [features] default = [] web_admin_panel = [ diff --git a/paddler_balancer/src/balancer_http_server.rs b/paddler_balancer/src/balancer_http_server.rs new file mode 100644 index 00000000..6aeea788 --- /dev/null +++ b/paddler_balancer/src/balancer_http_server.rs @@ -0,0 +1,25 @@ +pub enum BalancerHttpServer { + Inference, + Management, + OpenAI, + WebAdminPanel, +} + +impl BalancerHttpServer { + /// Number of actix worker threads each balancer HTTP server runs. + /// + /// One worker multiplexes thousands of connections as async tasks (a websocket is a suspended + /// task, not a thread), so these counts size CPU parallelism, not connection capacity. They are + /// fixed per service load profile: inference and OpenAI-compat are client-facing request + /// processors that do inline JSON/SSE work; management carries mostly-idle agent control + /// sockets; the web admin panel serves static assets to a handful of human operators. Fixed + /// values keep startup file-descriptor usage predictable regardless of the host CPU count. + #[must_use] + pub const fn worker_count(&self) -> usize { + match self { + Self::Inference | Self::OpenAI => 16, + Self::Management => 4, + Self::WebAdminPanel => 2, + } + } +} diff --git a/paddler_balancer/src/compatibility/openai_service/mod.rs b/paddler_balancer/src/compatibility/openai_service/mod.rs index d26fefe1..6d15eb27 100644 --- a/paddler_balancer/src/compatibility/openai_service/mod.rs +++ b/paddler_balancer/src/compatibility/openai_service/mod.rs @@ -63,6 +63,7 @@ use tokio_util::sync::CancellationToken; use trzcina::Service; use trzcina::ServiceShutdownOptions; +use crate::balancer_http_server::BalancerHttpServer; use crate::buffered_request_manager::BufferedRequestManager; use crate::compatibility::openai_service::app_data::AppData; use crate::compatibility::openai_service::configuration::Configuration as OpenAIServiceConfiguration; @@ -106,6 +107,7 @@ impl Service for OpenAIService { .configure(http_route::post_chat_completions::register) .configure(http_route::post_responses::register) }) + .workers(BalancerHttpServer::OpenAI.worker_count()) .shutdown_signal(async move { shutdown.cancelled().await; }) diff --git a/paddler_balancer/src/ensure_file_descriptor_limit.rs b/paddler_balancer/src/ensure_file_descriptor_limit.rs new file mode 100644 index 00000000..c80365fb --- /dev/null +++ b/paddler_balancer/src/ensure_file_descriptor_limit.rs @@ -0,0 +1,129 @@ +use nix::sys::resource::Resource; +use nix::sys::resource::getrlimit; + +use crate::balancer_http_server::BalancerHttpServer; +use crate::file_descriptor_limit_error::FileDescriptorLimitError; + +/// File descriptors each actix worker's tokio runtime keeps open: measured at 4 on macOS (three +/// kqueue descriptors and one unix-domain socket per worker) with `lsof` against running balancers +/// configured at 20, 22, and 38 workers, where the per-worker slope is exact across all three. +/// Conservative on epoll platforms such as Linux, whose runtimes use fewer descriptors and whose +/// default `RLIMIT_NOFILE` is higher. +const FDS_PER_WORKER: u64 = 4; + +/// File descriptors each active HTTP server keeps open beyond its workers: measured at 3 on macOS +/// (one bound listener and two kqueue descriptors) by the same `lsof` fit. +const FDS_PER_SERVER: u64 = 3; + +/// File descriptors the balancer holds open independent of its HTTP servers: stdio, the main actix +/// `System` runtime, shutdown-signal handling, the binary image, and the working directory. +/// Measured at 12 on macOS by the same `lsof` fit. +const BASE_PROCESS_FDS: u64 = 12; + +fn required_file_descriptors(active_servers: &[BalancerHttpServer]) -> u64 { + let worker_descriptors: u64 = active_servers + .iter() + .map(|active_server| active_server.worker_count() as u64 * FDS_PER_WORKER) + .sum(); + let server_descriptors = active_servers.len() as u64 * FDS_PER_SERVER; + + worker_descriptors + server_descriptors + BASE_PROCESS_FDS +} + +const fn evaluate_file_descriptor_limit( + soft_limit: u64, + required: u64, +) -> Result<(), FileDescriptorLimitError> { + if soft_limit < required { + Err(FileDescriptorLimitError::InsufficientDescriptors { + soft_limit, + required, + }) + } else { + Ok(()) + } +} + +pub fn ensure_file_descriptor_limit( + active_servers: &[BalancerHttpServer], +) -> Result<(), FileDescriptorLimitError> { + let required = required_file_descriptors(active_servers); + let soft_limit = getrlimit(Resource::RLIMIT_NOFILE) + .map_err(|errno| FileDescriptorLimitError::UnableToReadLimit { + message: errno.to_string(), + })? + .0; + + evaluate_file_descriptor_limit(soft_limit, required) +} + +#[cfg(test)] +mod tests { + use super::BalancerHttpServer; + use super::FileDescriptorLimitError; + use super::ensure_file_descriptor_limit; + use super::evaluate_file_descriptor_limit; + use super::required_file_descriptors; + + fn all_servers() -> [BalancerHttpServer; 4] { + [ + BalancerHttpServer::Inference, + BalancerHttpServer::Management, + BalancerHttpServer::OpenAI, + BalancerHttpServer::WebAdminPanel, + ] + } + + #[test] + fn errors_when_soft_limit_is_below_requirement() { + let required = required_file_descriptors(&all_servers()); + + match evaluate_file_descriptor_limit(required - 1, required) { + Err(FileDescriptorLimitError::InsufficientDescriptors { + soft_limit, + required: reported_required, + }) => { + assert_eq!(soft_limit, required - 1); + assert_eq!(reported_required, required); + } + other => panic!("expected InsufficientDescriptors, got {other:?}"), + } + } + + #[test] + fn insufficient_descriptors_message_states_the_numbers_and_the_remedy() { + let message = FileDescriptorLimitError::InsufficientDescriptors { + soft_limit: 64, + required: 176, + } + .to_string(); + + assert!(message.contains("64")); + assert!(message.contains("176")); + assert!(message.contains("ulimit -n 176")); + } + + #[test] + fn succeeds_when_soft_limit_meets_requirement() { + let required = required_file_descriptors(&all_servers()); + + assert!(evaluate_file_descriptor_limit(required, required).is_ok()); + } + + #[test] + fn requirement_grows_with_each_active_server() { + let core_servers = [ + BalancerHttpServer::Inference, + BalancerHttpServer::Management, + ]; + + assert!( + required_file_descriptors(&all_servers()) > required_file_descriptors(&core_servers) + ); + } + + #[test] + fn ensure_succeeds_under_the_test_process_limit() { + assert!(ensure_file_descriptor_limit(&all_servers()).is_ok()); + } +} diff --git a/paddler_balancer/src/file_descriptor_limit_error.rs b/paddler_balancer/src/file_descriptor_limit_error.rs new file mode 100644 index 00000000..e7dc0448 --- /dev/null +++ b/paddler_balancer/src/file_descriptor_limit_error.rs @@ -0,0 +1,13 @@ +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum FileDescriptorLimitError { + #[error( + "open file-descriptor limit is too low for the balancer: {soft_limit} available, at least \ + {required} required; raise it with `ulimit -n {required}` (or higher) and restart" + )] + InsufficientDescriptors { soft_limit: u64, required: u64 }, + + #[error("unable to read the open file-descriptor limit (RLIMIT_NOFILE): {message}")] + UnableToReadLimit { message: String }, +} diff --git a/paddler_balancer/src/inference_service/mod.rs b/paddler_balancer/src/inference_service/mod.rs index abaa2df5..13c1826a 100644 --- a/paddler_balancer/src/inference_service/mod.rs +++ b/paddler_balancer/src/inference_service/mod.rs @@ -16,6 +16,7 @@ use trzcina::ServiceShutdownOptions; use crate::agent_controller_pool::AgentControllerPool; use crate::balancer_applicable_state_holder::BalancerApplicableStateHolder; +use crate::balancer_http_server::BalancerHttpServer; use crate::buffered_request_manager::BufferedRequestManager; use crate::create_cors_middleware::create_cors_middleware; use crate::http_route as common_http_route; @@ -85,6 +86,7 @@ impl Service for InferenceService { .configure(http_route::api::post_generate_embedding_batch::register) .configure(http_route::api::ws_inference_socket::register) }) + .workers(BalancerHttpServer::Inference.worker_count()) .shutdown_signal(async move { shutdown.cancelled().await; }) diff --git a/paddler_balancer/src/lib.rs b/paddler_balancer/src/lib.rs index 3a4f0a03..2de989ef 100644 --- a/paddler_balancer/src/lib.rs +++ b/paddler_balancer/src/lib.rs @@ -6,6 +6,7 @@ pub mod agent_controller_update_result; pub mod balancer_applicable_state; pub mod balancer_applicable_state_holder; pub mod balancer_desired_state_converter; +pub mod balancer_http_server; mod buffered_request_agent_wait_result; mod buffered_request_count_guard; mod buffered_request_counter; @@ -23,6 +24,10 @@ pub mod create_cors_middleware; pub mod dispatch_candidate; pub mod dispatched_agent; pub mod embedding_sender_collection; +#[cfg(unix)] +pub mod ensure_file_descriptor_limit; +#[cfg(unix)] +pub mod file_descriptor_limit_error; pub mod generate_tokens_sender_collection; mod handles_agent_streaming_response; mod http_route; diff --git a/paddler_balancer/src/management_service/mod.rs b/paddler_balancer/src/management_service/mod.rs index 7316f005..ececc615 100644 --- a/paddler_balancer/src/management_service/mod.rs +++ b/paddler_balancer/src/management_service/mod.rs @@ -16,6 +16,7 @@ use trzcina::ServiceShutdownOptions; use crate::agent_controller_pool::AgentControllerPool; use crate::balancer_applicable_state_holder::BalancerApplicableStateHolder; +use crate::balancer_http_server::BalancerHttpServer; use crate::buffered_request_manager::BufferedRequestManager; use crate::chat_template_override_sender_collection::ChatTemplateOverrideSenderCollection; use crate::create_cors_middleware::create_cors_middleware; @@ -118,6 +119,7 @@ impl Service for ManagementService { .configure(http_route::api::ws_agent_socket::register) .configure(http_route::get_metrics::register) }) + .workers(BalancerHttpServer::Management.worker_count()) .shutdown_signal(async move { shutdown.cancelled().await; }) diff --git a/paddler_balancer/src/state_database/file/mod.rs b/paddler_balancer/src/state_database/file/mod.rs index 132d46a8..8b752f7e 100644 --- a/paddler_balancer/src/state_database/file/mod.rs +++ b/paddler_balancer/src/state_database/file/mod.rs @@ -281,6 +281,9 @@ mod tests { assert!(store_error.downcast_ref::().is_some()); } + // `/dev/full` always fails writes with `ENOSPC` (`StorageFull`) only on Linux; other platforms + // surface a different error kind, so this exact-kind assertion is Linux-specific. + #[cfg(target_os = "linux")] #[tokio::test] async fn storing_a_large_schema_surfaces_the_write_error_during_write_all() { const TOKIO_FILE_BUFFER_BYTES: usize = 2 * 1024 * 1024; diff --git a/paddler_balancer/src/web_admin_panel_service/mod.rs b/paddler_balancer/src/web_admin_panel_service/mod.rs index 25a9592d..21844090 100644 --- a/paddler_balancer/src/web_admin_panel_service/mod.rs +++ b/paddler_balancer/src/web_admin_panel_service/mod.rs @@ -13,6 +13,7 @@ use tokio_util::sync::CancellationToken; use trzcina::Service; use trzcina::ServiceShutdownOptions; +use crate::balancer_http_server::BalancerHttpServer; use crate::web_admin_panel_service::app_data::AppData; use crate::web_admin_panel_service::configuration::Configuration as WebAdminPanelServiceConfiguration; @@ -41,6 +42,7 @@ impl Service for WebAdminPanelService { .configure(http_route::static_files::register) .configure(http_route::home::register) }) + .workers(BalancerHttpServer::WebAdminPanel.worker_count()) .shutdown_signal(async move { shutdown.cancelled().await; }) diff --git a/paddler_bootstrap/src/balancer_service_bundle.rs b/paddler_bootstrap/src/balancer_service_bundle.rs index a7399a15..2ee4067c 100644 --- a/paddler_bootstrap/src/balancer_service_bundle.rs +++ b/paddler_bootstrap/src/balancer_service_bundle.rs @@ -5,11 +5,15 @@ use anyhow::Result; use async_trait::async_trait; use paddler_balancer::agent_controller_pool::AgentControllerPool; use paddler_balancer::balancer_applicable_state_holder::BalancerApplicableStateHolder; +#[cfg(unix)] +use paddler_balancer::balancer_http_server::BalancerHttpServer; use paddler_balancer::buffered_request_manager::BufferedRequestManager; use paddler_balancer::chat_template_override_sender_collection::ChatTemplateOverrideSenderCollection; use paddler_balancer::compatibility::openai_service::OpenAIService; use paddler_balancer::compatibility::openai_service::configuration::Configuration as OpenAIServiceConfiguration; use paddler_balancer::embedding_sender_collection::EmbeddingSenderCollection; +#[cfg(unix)] +use paddler_balancer::ensure_file_descriptor_limit::ensure_file_descriptor_limit; use paddler_balancer::generate_tokens_sender_collection::GenerateTokensSenderCollection; use paddler_balancer::inference_service::InferenceService; use paddler_balancer::inference_service::configuration::Configuration as InferenceServiceConfiguration; @@ -78,6 +82,25 @@ impl BalancerServiceBundle { web_admin_panel_service_configuration, }: BalancerBootstrapConfig, ) -> Result { + #[cfg(unix)] + { + let mut active_http_servers = vec![ + BalancerHttpServer::Inference, + BalancerHttpServer::Management, + ]; + + if openai_service_configuration.is_some() { + active_http_servers.push(BalancerHttpServer::OpenAI); + } + + #[cfg(feature = "web_admin_panel")] + if web_admin_panel_service_configuration.is_some() { + active_http_servers.push(BalancerHttpServer::WebAdminPanel); + } + + ensure_file_descriptor_limit(&active_http_servers)?; + } + let (balancer_desired_state_tx, balancer_desired_state_rx) = broadcast::channel(100); let agent_controller_pool = Arc::new(AgentControllerPool::default()); diff --git a/paddler_cli/tests/balancer_reports_readable_error_when_file_descriptor_limit_is_too_low.rs b/paddler_cli/tests/balancer_reports_readable_error_when_file_descriptor_limit_is_too_low.rs new file mode 100644 index 00000000..843eb5f6 --- /dev/null +++ b/paddler_cli/tests/balancer_reports_readable_error_when_file_descriptor_limit_is_too_low.rs @@ -0,0 +1,43 @@ +#![cfg(unix)] + +use std::process::Command; + +/// Recreates the original bug: under a low `RLIMIT_NOFILE`, starting the balancer used to spawn +/// dozens of actix worker runtimes, exhaust the descriptor table, and panic inside actix with an +/// opaque `RecvError`. The balancer must now refuse to start with a readable, actionable error +/// before any actix server is built. +#[test] +fn balancer_reports_readable_error_when_file_descriptor_limit_is_too_low() { + let paddler_binary = env!("CARGO_BIN_EXE_paddler"); + + // 64 is below the ~165 descriptors the three-server balancer requires, and well above the + // descriptors the process opens before the pre-flight check runs. The child shell lowers its + // own soft limit, then `exec`s the balancer, which inherits it. + let invocation = format!( + "ulimit -n 64; exec {paddler_binary} balancer \ + --inference-addr 127.0.0.1:0 --management-addr 127.0.0.1:0 --compat-openai-addr 127.0.0.1:0" + ); + + let output = Command::new("sh") + .arg("-c") + .arg(&invocation) + .output() + .expect("failed to spawn the paddler balancer subprocess"); + + let stderr = String::from_utf8_lossy(&output.stderr); + + assert!( + !output.status.success(), + "balancer should exit non-zero when the descriptor limit is too low; stderr was:\n{stderr}" + ); + assert!( + stderr.contains("open file-descriptor limit is too low") && stderr.contains("ulimit -n"), + "balancer should report a readable, actionable descriptor error; stderr was:\n{stderr}" + ); + assert!( + !stderr.contains("panicked") + && !stderr.contains("RecvError") + && !stderr.contains("Too many open files"), + "balancer must not surface the actix panic cascade; stderr was:\n{stderr}" + ); +}