diff --git a/paddler_balancer/src/compatibility/openai_service/mod.rs b/paddler_balancer/src/compatibility/openai_service/mod.rs index d26fefe1..30fb0c7e 100644 --- a/paddler_balancer/src/compatibility/openai_service/mod.rs +++ b/paddler_balancer/src/compatibility/openai_service/mod.rs @@ -70,6 +70,8 @@ use crate::create_cors_middleware::create_cors_middleware; use crate::http_route as common_http_route; use crate::inference_service::configuration::Configuration as InferenceServiceConfiguration; +const HTTP_WORKERS: usize = 16; + pub struct OpenAIService { pub buffered_request_manager: Arc, pub inference_service_configuration: InferenceServiceConfiguration, @@ -106,6 +108,7 @@ impl Service for OpenAIService { .configure(http_route::post_chat_completions::register) .configure(http_route::post_responses::register) }) + .workers(HTTP_WORKERS) .shutdown_signal(async move { shutdown.cancelled().await; }) diff --git a/paddler_balancer/src/inference_service/mod.rs b/paddler_balancer/src/inference_service/mod.rs index abaa2df5..f96f5a70 100644 --- a/paddler_balancer/src/inference_service/mod.rs +++ b/paddler_balancer/src/inference_service/mod.rs @@ -24,6 +24,8 @@ use crate::inference_service::configuration::Configuration as InferenceServiceCo #[cfg(feature = "web_admin_panel")] use crate::web_admin_panel_service::configuration::Configuration as WebAdminPanelServiceConfiguration; +const HTTP_WORKERS: usize = 16; + pub struct InferenceService { pub agent_controller_pool: Arc, pub balancer_applicable_state_holder: Arc, @@ -85,6 +87,7 @@ impl Service for InferenceService { .configure(http_route::api::post_generate_embedding_batch::register) .configure(http_route::api::ws_inference_socket::register) }) + .workers(HTTP_WORKERS) .shutdown_signal(async move { shutdown.cancelled().await; }) diff --git a/paddler_balancer/src/management_service/mod.rs b/paddler_balancer/src/management_service/mod.rs index 7316f005..a89731db 100644 --- a/paddler_balancer/src/management_service/mod.rs +++ b/paddler_balancer/src/management_service/mod.rs @@ -39,6 +39,8 @@ fn collect_web_admin_panel_cors_allowed_hosts( .collect() } +const HTTP_WORKERS: usize = 2; + pub struct ManagementService { pub agent_controller_pool: Arc, pub balancer_applicable_state_holder: Arc, @@ -118,6 +120,7 @@ impl Service for ManagementService { .configure(http_route::api::ws_agent_socket::register) .configure(http_route::get_metrics::register) }) + .workers(HTTP_WORKERS) .shutdown_signal(async move { shutdown.cancelled().await; }) diff --git a/paddler_balancer/src/web_admin_panel_service/mod.rs b/paddler_balancer/src/web_admin_panel_service/mod.rs index 25a9592d..6c9915d1 100644 --- a/paddler_balancer/src/web_admin_panel_service/mod.rs +++ b/paddler_balancer/src/web_admin_panel_service/mod.rs @@ -16,6 +16,8 @@ use trzcina::ServiceShutdownOptions; use crate::web_admin_panel_service::app_data::AppData; use crate::web_admin_panel_service::configuration::Configuration as WebAdminPanelServiceConfiguration; +const HTTP_WORKERS: usize = 2; + pub struct WebAdminPanelService { pub configuration: WebAdminPanelServiceConfiguration, pub shutdown_options: ServiceShutdownOptions, @@ -41,6 +43,7 @@ impl Service for WebAdminPanelService { .configure(http_route::static_files::register) .configure(http_route::home::register) }) + .workers(HTTP_WORKERS) .shutdown_signal(async move { shutdown.cancelled().await; })