intentee · malzag · Jun 5, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/paddler_balancer/Cargo.toml b/paddler_balancer/Cargo.toml
@@ -44,6 +44,9 @@ esbuild-metafile = { workspace = true, optional = true }
 mime_guess = { workspace = true, optional = true }
 rust-embed = { workspace = true, optional = true }
 
+[target.'cfg(unix)'.dependencies]
+nix = { workspace = true, features = ["resource"] }
+
 [features]
 default = []
 web_admin_panel = [

diff --git a/paddler_balancer/src/balancer_http_server.rs b/paddler_balancer/src/balancer_http_server.rs
@@ -0,0 +1,25 @@
+pub enum BalancerHttpServer {
+    Inference,
+    Management,
+    OpenAI,
+    WebAdminPanel,
+}
+
+impl BalancerHttpServer {
+    /// Number of actix worker threads each balancer HTTP server runs.
+    ///
+    /// One worker multiplexes thousands of connections as async tasks (a websocket is a suspended
+    /// task, not a thread), so these counts size CPU parallelism, not connection capacity. They are
+    /// fixed per service load profile: inference and OpenAI-compat are client-facing request
+    /// processors that do inline JSON/SSE work; management carries mostly-idle agent control
+    /// sockets; the web admin panel serves static assets to a handful of human operators. Fixed
+    /// values keep startup file-descriptor usage predictable regardless of the host CPU count.
+    #[must_use]
+    pub const fn worker_count(&self) -> usize {
+        match self {
+            Self::Inference | Self::OpenAI => 16,
+            Self::Management => 4,
+            Self::WebAdminPanel => 2,
+        }
+    }
+}
diff --git a/paddler_balancer/src/compatibility/openai_service/mod.rs b/paddler_balancer/src/compatibility/openai_service/mod.rs
@@ -63,6 +63,7 @@ use tokio_util::sync::CancellationToken;
 use trzcina::Service;
 use trzcina::ServiceShutdownOptions;
 
+use crate::balancer_http_server::BalancerHttpServer;
 use crate::buffered_request_manager::BufferedRequestManager;
 use crate::compatibility::openai_service::app_data::AppData;
 use crate::compatibility::openai_service::configuration::Configuration as OpenAIServiceConfiguration;
@@ -106,6 +107,7 @@ impl Service for OpenAIService {
                 .configure(http_route::post_chat_completions::register)
                 .configure(http_route::post_responses::register)
         })
+        .workers(BalancerHttpServer::OpenAI.worker_count())
         .shutdown_signal(async move {
             shutdown.cancelled().await;
         })

diff --git a/paddler_balancer/src/ensure_file_descriptor_limit.rs b/paddler_balancer/src/ensure_file_descriptor_limit.rs
@@ -0,0 +1,129 @@
+use nix::sys::resource::Resource;
+use nix::sys::resource::getrlimit;
+
+use crate::balancer_http_server::BalancerHttpServer;
+use crate::file_descriptor_limit_error::FileDescriptorLimitError;
+
+/// File descriptors each actix worker's tokio runtime keeps open: measured at 4 on macOS (three
+/// kqueue descriptors and one unix-domain socket per worker) with `lsof` against running balancers
+/// configured at 20, 22, and 38 workers, where the per-worker slope is exact across all three.
+/// Conservative on epoll platforms such as Linux, whose runtimes use fewer descriptors and whose
+/// default `RLIMIT_NOFILE` is higher.
+const FDS_PER_WORKER: u64 = 4;
+
+/// File descriptors each active HTTP server keeps open beyond its workers: measured at 3 on macOS
+/// (one bound listener and two kqueue descriptors) by the same `lsof` fit.
+const FDS_PER_SERVER: u64 = 3;
+
+/// File descriptors the balancer holds open independent of its HTTP servers: stdio, the main actix
+/// `System` runtime, shutdown-signal handling, the binary image, and the working directory.
+/// Measured at 12 on macOS by the same `lsof` fit.
+const BASE_PROCESS_FDS: u64 = 12;
+
+fn required_file_descriptors(active_servers: &[BalancerHttpServer]) -> u64 {
+    let worker_descriptors: u64 = active_servers
+        .iter()
+        .map(|active_server| active_server.worker_count() as u64 * FDS_PER_WORKER)
+        .sum();
+    let server_descriptors = active_servers.len() as u64 * FDS_PER_SERVER;
+
+    worker_descriptors + server_descriptors + BASE_PROCESS_FDS
+}
+
+const fn evaluate_file_descriptor_limit(
+    soft_limit: u64,
+    required: u64,
+) -> Result<(), FileDescriptorLimitError> {
+    if soft_limit < required {
+        Err(FileDescriptorLimitError::InsufficientDescriptors {
+            soft_limit,
+            required,
+        })
+    } else {
+        Ok(())
+    }
+}
+
+pub fn ensure_file_descriptor_limit(
+    active_servers: &[BalancerHttpServer],
+) -> Result<(), FileDescriptorLimitError> {
+    let required = required_file_descriptors(active_servers);
+    let soft_limit = getrlimit(Resource::RLIMIT_NOFILE)
+        .map_err(|errno| FileDescriptorLimitError::UnableToReadLimit {
+            message: errno.to_string(),
+        })?
+        .0;
+
+    evaluate_file_descriptor_limit(soft_limit, required)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::BalancerHttpServer;
+    use super::FileDescriptorLimitError;
+    use super::ensure_file_descriptor_limit;
+    use super::evaluate_file_descriptor_limit;
+    use super::required_file_descriptors;
+
+    fn all_servers() -> [BalancerHttpServer; 4] {
+        [
+            BalancerHttpServer::Inference,
+            BalancerHttpServer::Management,
+            BalancerHttpServer::OpenAI,
+            BalancerHttpServer::WebAdminPanel,
+        ]
+    }
+
+    #[test]
+    fn errors_when_soft_limit_is_below_requirement() {
+        let required = required_file_descriptors(&all_servers());
+
+        match evaluate_file_descriptor_limit(required - 1, required) {
+            Err(FileDescriptorLimitError::InsufficientDescriptors {
+                soft_limit,
+                required: reported_required,
+            }) => {
+                assert_eq!(soft_limit, required - 1);
+                assert_eq!(reported_required, required);
+            }
+            other => panic!("expected InsufficientDescriptors, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn insufficient_descriptors_message_states_the_numbers_and_the_remedy() {
+        let message = FileDescriptorLimitError::InsufficientDescriptors {
+            soft_limit: 64,
+            required: 176,
+        }
+        .to_string();
+
+        assert!(message.contains("64"));
+        assert!(message.contains("176"));
+        assert!(message.contains("ulimit -n 176"));
+    }
+
+    #[test]
+    fn succeeds_when_soft_limit_meets_requirement() {
+        let required = required_file_descriptors(&all_servers());
+
+        assert!(evaluate_file_descriptor_limit(required, required).is_ok());
+    }
+
+    #[test]
+    fn requirement_grows_with_each_active_server() {
+        let core_servers = [
+            BalancerHttpServer::Inference,
+            BalancerHttpServer::Management,
+        ];
+
+        assert!(
+            required_file_descriptors(&all_servers()) > required_file_descriptors(&core_servers)
+        );
+    }
+
+    #[test]
+    fn ensure_succeeds_under_the_test_process_limit() {
+        assert!(ensure_file_descriptor_limit(&all_servers()).is_ok());
+    }
+}
diff --git a/paddler_balancer/src/file_descriptor_limit_error.rs b/paddler_balancer/src/file_descriptor_limit_error.rs
@@ -0,0 +1,13 @@
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum FileDescriptorLimitError {
+    #[error(
+        "open file-descriptor limit is too low for the balancer: {soft_limit} available, at least \
+         {required} required; raise it with `ulimit -n {required}` (or higher) and restart"
+    )]
+    InsufficientDescriptors { soft_limit: u64, required: u64 },
+
+    #[error("unable to read the open file-descriptor limit (RLIMIT_NOFILE): {message}")]
+    UnableToReadLimit { message: String },
+}
diff --git a/paddler_balancer/src/inference_service/mod.rs b/paddler_balancer/src/inference_service/mod.rs
@@ -16,6 +16,7 @@ use trzcina::ServiceShutdownOptions;
 
 use crate::agent_controller_pool::AgentControllerPool;
 use crate::balancer_applicable_state_holder::BalancerApplicableStateHolder;
+use crate::balancer_http_server::BalancerHttpServer;
 use crate::buffered_request_manager::BufferedRequestManager;
 use crate::create_cors_middleware::create_cors_middleware;
 use crate::http_route as common_http_route;
@@ -85,6 +86,7 @@ impl Service for InferenceService {
                 .configure(http_route::api::post_generate_embedding_batch::register)
                 .configure(http_route::api::ws_inference_socket::register)
         })
+        .workers(BalancerHttpServer::Inference.worker_count())
         .shutdown_signal(async move {
             shutdown.cancelled().await;
         })

diff --git a/paddler_balancer/src/lib.rs b/paddler_balancer/src/lib.rs
@@ -6,6 +6,7 @@ pub mod agent_controller_update_result;
 pub mod balancer_applicable_state;
 pub mod balancer_applicable_state_holder;
 pub mod balancer_desired_state_converter;
+pub mod balancer_http_server;
 mod buffered_request_agent_wait_result;
 mod buffered_request_count_guard;
 mod buffered_request_counter;
@@ -23,6 +24,10 @@ pub mod create_cors_middleware;
 pub mod dispatch_candidate;
 pub mod dispatched_agent;
 pub mod embedding_sender_collection;
+#[cfg(unix)]
+pub mod ensure_file_descriptor_limit;
+#[cfg(unix)]
+pub mod file_descriptor_limit_error;
 pub mod generate_tokens_sender_collection;
 mod handles_agent_streaming_response;
 mod http_route;

diff --git a/paddler_balancer/src/management_service/mod.rs b/paddler_balancer/src/management_service/mod.rs
@@ -16,6 +16,7 @@ use trzcina::ServiceShutdownOptions;
 
 use crate::agent_controller_pool::AgentControllerPool;
 use crate::balancer_applicable_state_holder::BalancerApplicableStateHolder;
+use crate::balancer_http_server::BalancerHttpServer;
 use crate::buffered_request_manager::BufferedRequestManager;
 use crate::chat_template_override_sender_collection::ChatTemplateOverrideSenderCollection;
 use crate::create_cors_middleware::create_cors_middleware;
@@ -118,6 +119,7 @@ impl Service for ManagementService {
                 .configure(http_route::api::ws_agent_socket::register)
                 .configure(http_route::get_metrics::register)
         })
+        .workers(BalancerHttpServer::Management.worker_count())
         .shutdown_signal(async move {
             shutdown.cancelled().await;
         })

diff --git a/paddler_balancer/src/state_database/file/mod.rs b/paddler_balancer/src/state_database/file/mod.rs
@@ -281,6 +281,9 @@ mod tests {
         assert!(store_error.downcast_ref::<std::io::Error>().is_some());
     }
 
+    // `/dev/full` always fails writes with `ENOSPC` (`StorageFull`) only on Linux; other platforms
+    // surface a different error kind, so this exact-kind assertion is Linux-specific.
+    #[cfg(target_os = "linux")]
     #[tokio::test]
     async fn storing_a_large_schema_surfaces_the_write_error_during_write_all() {
         const TOKIO_FILE_BUFFER_BYTES: usize = 2 * 1024 * 1024;

diff --git a/paddler_balancer/src/web_admin_panel_service/mod.rs b/paddler_balancer/src/web_admin_panel_service/mod.rs
@@ -13,6 +13,7 @@ use tokio_util::sync::CancellationToken;
 use trzcina::Service;
 use trzcina::ServiceShutdownOptions;
 
+use crate::balancer_http_server::BalancerHttpServer;
 use crate::web_admin_panel_service::app_data::AppData;
 use crate::web_admin_panel_service::configuration::Configuration as WebAdminPanelServiceConfiguration;
 
@@ -41,6 +42,7 @@ impl Service for WebAdminPanelService {
                 .configure(http_route::static_files::register)
                 .configure(http_route::home::register)
         })
+        .workers(BalancerHttpServer::WebAdminPanel.worker_count())
         .shutdown_signal(async move {
             shutdown.cancelled().await;
         })

diff --git a/paddler_bootstrap/src/balancer_service_bundle.rs b/paddler_bootstrap/src/balancer_service_bundle.rs
@@ -5,11 +5,15 @@ use anyhow::Result;
 use async_trait::async_trait;
 use paddler_balancer::agent_controller_pool::AgentControllerPool;
 use paddler_balancer::balancer_applicable_state_holder::BalancerApplicableStateHolder;
+#[cfg(unix)]
+use paddler_balancer::balancer_http_server::BalancerHttpServer;
 use paddler_balancer::buffered_request_manager::BufferedRequestManager;
 use paddler_balancer::chat_template_override_sender_collection::ChatTemplateOverrideSenderCollection;
 use paddler_balancer::compatibility::openai_service::OpenAIService;
 use paddler_balancer::compatibility::openai_service::configuration::Configuration as OpenAIServiceConfiguration;
 use paddler_balancer::embedding_sender_collection::EmbeddingSenderCollection;
+#[cfg(unix)]
+use paddler_balancer::ensure_file_descriptor_limit::ensure_file_descriptor_limit;
 use paddler_balancer::generate_tokens_sender_collection::GenerateTokensSenderCollection;
 use paddler_balancer::inference_service::InferenceService;
 use paddler_balancer::inference_service::configuration::Configuration as InferenceServiceConfiguration;
@@ -78,6 +82,25 @@ impl BalancerServiceBundle {
             web_admin_panel_service_configuration,
         }: BalancerBootstrapConfig,
     ) -> Result<Self> {
+        #[cfg(unix)]
+        {
+            let mut active_http_servers = vec![
+                BalancerHttpServer::Inference,
+                BalancerHttpServer::Management,
+            ];
+
+            if openai_service_configuration.is_some() {
+                active_http_servers.push(BalancerHttpServer::OpenAI);
+            }
+
+            #[cfg(feature = "web_admin_panel")]
+            if web_admin_panel_service_configuration.is_some() {
+                active_http_servers.push(BalancerHttpServer::WebAdminPanel);
+            }
+
+            ensure_file_descriptor_limit(&active_http_servers)?;
+        }
+
         let (balancer_desired_state_tx, balancer_desired_state_rx) = broadcast::channel(100);
 
         let agent_controller_pool = Arc::new(AgentControllerPool::default());

diff --git a/paddler_cli/tests/balancer_reports_readable_error_when_file_descriptor_limit_is_too_low.rs b/paddler_cli/tests/balancer_reports_readable_error_when_file_descriptor_limit_is_too_low.rs
@@ -0,0 +1,43 @@
+#![cfg(unix)]
+
+use std::process::Command;
+
+/// Recreates the original bug: under a low `RLIMIT_NOFILE`, starting the balancer used to spawn
+/// dozens of actix worker runtimes, exhaust the descriptor table, and panic inside actix with an
+/// opaque `RecvError`. The balancer must now refuse to start with a readable, actionable error
+/// before any actix server is built.
+#[test]
+fn balancer_reports_readable_error_when_file_descriptor_limit_is_too_low() {
+    let paddler_binary = env!("CARGO_BIN_EXE_paddler");
+
+    // 64 is below the ~165 descriptors the three-server balancer requires, and well above the
+    // descriptors the process opens before the pre-flight check runs. The child shell lowers its
+    // own soft limit, then `exec`s the balancer, which inherits it.
+    let invocation = format!(
+        "ulimit -n 64; exec {paddler_binary} balancer \
+         --inference-addr 127.0.0.1:0 --management-addr 127.0.0.1:0 --compat-openai-addr 127.0.0.1:0"
+    );
+
+    let output = Command::new("sh")
+        .arg("-c")
+        .arg(&invocation)
+        .output()
+        .expect("failed to spawn the paddler balancer subprocess");
+
+    let stderr = String::from_utf8_lossy(&output.stderr);
+
+    assert!(
+        !output.status.success(),
+        "balancer should exit non-zero when the descriptor limit is too low; stderr was:\n{stderr}"
+    );
+    assert!(
+        stderr.contains("open file-descriptor limit is too low") && stderr.contains("ulimit -n"),
+        "balancer should report a readable, actionable descriptor error; stderr was:\n{stderr}"
+    );
+    assert!(
+        !stderr.contains("panicked")
+            && !stderr.contains("RecvError")
+            && !stderr.contains("Too many open files"),
+        "balancer must not surface the actix panic cascade; stderr was:\n{stderr}"
+    );
+}