diff --git a/.gitignore b/.gitignore index 32e5114..915379f 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,8 @@ Cargo.lock *.pdb .idea/ -run_example \ No newline at end of file +run_example + +# Temporary test binaries +verify_pooling +/tmp/*.rs \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index fb1dd08..9b8be44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,4 +17,6 @@ openai-rust2 = { version = "1.6.0" } async-trait = "0.1.88" log = "0.4.27" -env_logger = "0.11.8" \ No newline at end of file +env_logger = "0.11.8" +once_cell = "1.21" +reqwest = { version = "0.12", features = ["json", "stream"] } \ No newline at end of file diff --git a/changelog.txt b/changelog.txt index 4faa249..083b549 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,12 @@ +0.2.13 (Upcoming) + - Implemented HTTP client connection pooling for all providers: + - Created http_pool module that maintains singleton reqwest::Client instances per base URL + - Configured clients with persistent connections (90s idle timeout, 10 max idle per host) + - Added TCP keepalive (60s) to prevent connection drops + - Minimizes DNS lookups, TLS handshakes, and TCP connection overhead + - All clients (OpenAI, Gemini, Claude, Grok) automatically benefit from connection pooling + - Updated documentation to reflect connection pooling behavior + 0.2.12 SEP/21/2025 - Added Claude client implementation at src/cloudllm/clients/claude.rs: - ClaudeClient struct follows the same delegate pattern as GrokClient, using OpenAIClient internally diff --git a/src/cloudllm/clients/claude.rs b/src/cloudllm/clients/claude.rs index e08682a..62bffa7 100644 --- a/src/cloudllm/clients/claude.rs +++ b/src/cloudllm/clients/claude.rs @@ -1,15 +1,20 @@ use crate::client_wrapper::TokenUsage; -use crate::clients::claude::Model::ClaudeSonnet4; use crate::clients::openai::OpenAIClient; -use crate::{ClientWrapper, LLMSession, Message, Role}; +use crate::{ClientWrapper, Message}; use async_trait::async_trait; -use log::{error, info}; use openai_rust2 as openai_rust; -use openai_rust2::chat::SearchMode; -use std::env; use std::error::Error; use std::sync::Mutex; -use tokio::runtime::Runtime; + +#[cfg(test)] +use { + std::env, + tokio::runtime::Runtime, + crate::LLMSession, + crate::Role, + crate::clients::claude::Model::ClaudeSonnet4, + log::{error, info}, +}; pub struct ClaudeClient { delegate_client: OpenAIClient, diff --git a/src/cloudllm/clients/gemini.rs b/src/cloudllm/clients/gemini.rs index 74e92e4..75edf11 100644 --- a/src/cloudllm/clients/gemini.rs +++ b/src/cloudllm/clients/gemini.rs @@ -1,15 +1,20 @@ use crate::client_wrapper::TokenUsage; use crate::clients::common::send_and_track; -use crate::clients::openai::OpenAIClient; -use crate::{ClientWrapper, LLMSession, Message, Role}; +use crate::clients::http_pool::get_http_client; +use crate::{ClientWrapper, Message, Role}; use async_trait::async_trait; -use log::{error, info}; +use log::error; use openai_rust::chat; use openai_rust2 as openai_rust; -use std::env; -use std::error::Error; use std::sync::Mutex; -use tokio::runtime::Runtime; + +#[cfg(test)] +use { + std::env, + tokio::runtime::Runtime, + crate::LLMSession, + log::info, +}; pub struct GeminiClient { client: openai_rust::Client, @@ -148,10 +153,14 @@ pub fn model_to_string(model: Model) -> String { impl GeminiClient { pub fn new_with_model_string(secret_key: &str, model_name: &str) -> Self { + let base_url = "https://generativelanguage.googleapis.com/v1beta/"; + let http_client = get_http_client(base_url); + GeminiClient { - client: openai_rust::Client::new_with_base_url( + client: openai_rust::Client::new_with_client_and_base_url( secret_key, - "https://generativelanguage.googleapis.com/v1beta/", + http_client, + base_url, ), model: model_name.to_string(), token_usage: Mutex::new(None), @@ -165,8 +174,10 @@ impl GeminiClient { /// This function is used to create a GeminiClient with a custom base URL /// The default base URL is "" pub fn new_with_base_url(secret_key: &str, model_name: &str, base_url: &str) -> Self { + let http_client = get_http_client(base_url); + GeminiClient { - client: openai_rust::Client::new_with_base_url(secret_key, base_url), + client: openai_rust::Client::new_with_client_and_base_url(secret_key, http_client, base_url), model: model_name.to_string(), token_usage: Mutex::new(None), } diff --git a/src/cloudllm/clients/grok.rs b/src/cloudllm/clients/grok.rs index 9af32f2..ddd92ab 100644 --- a/src/cloudllm/clients/grok.rs +++ b/src/cloudllm/clients/grok.rs @@ -1,15 +1,21 @@ use crate::client_wrapper::TokenUsage; -use crate::clients::grok::Model::Grok4_0709; use crate::clients::openai::OpenAIClient; -use crate::{ClientWrapper, LLMSession, Message, Role}; +use crate::{ClientWrapper, Message}; use async_trait::async_trait; -use log::{error, info}; use openai_rust2 as openai_rust; -use openai_rust2::chat::SearchMode; -use std::env; use std::error::Error; use std::sync::Mutex; -use tokio::runtime::Runtime; + +#[cfg(test)] +use { + std::env, + tokio::runtime::Runtime, + crate::LLMSession, + crate::Role, + crate::clients::grok::Model::Grok4_0709, + openai_rust2::chat::SearchMode, + log::{error, info}, +}; pub struct GrokClient { delegate_client: OpenAIClient, diff --git a/src/cloudllm/clients/http_pool.rs b/src/cloudllm/clients/http_pool.rs new file mode 100644 index 0000000..4eece09 --- /dev/null +++ b/src/cloudllm/clients/http_pool.rs @@ -0,0 +1,57 @@ +//! HTTP Client Pool for maintaining persistent connections per base URL. +//! +//! This module provides a singleton pool of reqwest::Client instances, one per base URL. +//! This ensures that: +//! - HTTP connections are reused across multiple requests (connection pooling) +//! - DNS lookups are minimized +//! - TLS handshakes are reused where possible +//! - TCP connections are kept alive to avoid reconnection overhead +//! +//! The reqwest::Client is configured with optimal settings for persistent connections: +//! - `pool_idle_timeout`: Keeps idle connections alive for 90 seconds +//! - `pool_max_idle_per_host`: Allows up to 10 idle connections per host +//! - `tcp_keepalive`: Sends keepalive packets every 60 seconds to prevent connection closure + +use once_cell::sync::Lazy; +use reqwest; +use std::collections::HashMap; +use std::sync::Mutex; +use std::time::Duration; + +/// Global HTTP client pool, lazily initialized on first access. +static HTTP_CLIENT_POOL: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); + +/// Get or create a shared HTTP client for the given base URL. +/// +/// This function maintains a singleton pool of reqwest::Client instances. +/// Each base URL gets its own client to ensure proper connection pooling. +/// +/// # Arguments +/// * `base_url` - The base URL for which to get/create an HTTP client +/// +/// # Returns +/// A cloned reqwest::Client configured for persistent connections +pub fn get_http_client(base_url: &str) -> reqwest::Client { + let mut pool = HTTP_CLIENT_POOL.lock().unwrap(); + + if let Some(client) = pool.get(base_url) { + return client.clone(); + } + + // Create a new client with optimal settings for persistent connections + let client = reqwest::ClientBuilder::new() + // Keep idle connections alive for 90 seconds + .pool_idle_timeout(Some(Duration::from_secs(90))) + // Allow up to 10 idle connections per host for better throughput + .pool_max_idle_per_host(10) + // Enable TCP keepalive to prevent connection drops + .tcp_keepalive(Some(Duration::from_secs(60))) + // Set a reasonable timeout for the entire request + .timeout(Duration::from_secs(300)) + .build() + .expect("Failed to build HTTP client"); + + pool.insert(base_url.to_string(), client.clone()); + client +} diff --git a/src/cloudllm/clients/mod.rs b/src/cloudllm/clients/mod.rs index 0dd7971..26539f9 100644 --- a/src/cloudllm/clients/mod.rs +++ b/src/cloudllm/clients/mod.rs @@ -1,5 +1,6 @@ // src/clients/mod.rs pub mod common; +pub mod http_pool; pub mod claude; pub mod gemini; diff --git a/src/cloudllm/clients/openai.rs b/src/cloudllm/clients/openai.rs index 6b70f7e..1c61a2f 100644 --- a/src/cloudllm/clients/openai.rs +++ b/src/cloudllm/clients/openai.rs @@ -7,6 +7,8 @@ //! - **send_message(...)**: unchanged signature; returns a `Message` as before. //! - **Automatic Usage Capture**: stores the latest `TokenUsage` (input_tokens, output_tokens, total_tokens) internally. //! - **Inspect Usage**: call `get_last_usage()` after `send_message()` to retrieve actual usage stats. +//! - **Connection Pooling**: Automatically uses persistent HTTP connections. Multiple `OpenAIClient` instances +//! with the same base URL share a connection pool, minimizing DNS lookups, TLS handshakes, and TCP overhead. //! //! # Example //! @@ -41,21 +43,27 @@ //! //! Make sure `OPENAI_API_KEY` is set and pick a valid model name (e.g. `"gpt-4.1-nano"`). -use std::env; use std::error::Error; use async_trait::async_trait; -use log::{error, info}; +use log::error; use openai_rust::chat; use openai_rust2 as openai_rust; use crate::client_wrapper::TokenUsage; use crate::clients::common::send_and_track; +use crate::clients::http_pool::get_http_client; use crate::cloudllm::client_wrapper::{ClientWrapper, Message, Role}; use std::sync::Mutex; -use tokio::runtime::Runtime; -use crate::clients::openai::Model::GPT5Nano; -use crate::LLMSession; + +#[cfg(test)] +use { + std::env, + tokio::runtime::Runtime, + crate::LLMSession, + crate::clients::openai::Model::GPT5Nano, + log::info, +}; pub enum Model { GPT5, // Higher Reasoning, Medium speed, Text+Image input, Text output; input $1.25/1M tokens, cached input $0.125/1M tokens, output $10/1M tokens @@ -119,16 +127,22 @@ impl OpenAIClient { } pub fn new_with_model_string(secret_key: &str, model_name: &str) -> Self { + // Use default OpenAI base URL + let base_url = "https://api.openai.com"; + let http_client = get_http_client(base_url); + OpenAIClient { - client: openai_rust::Client::new(secret_key), + client: openai_rust::Client::new_with_client(secret_key, http_client), model: model_name.to_string(), token_usage: Mutex::new(None), } } pub fn new_with_base_url(secret_key: &str, model_name: &str, base_url: &str) -> Self { + let http_client = get_http_client(base_url); + OpenAIClient { - client: openai_rust::Client::new_with_base_url(secret_key, base_url), + client: openai_rust::Client::new_with_client_and_base_url(secret_key, http_client, base_url), model: model_name.to_string(), token_usage: Mutex::new(None), } diff --git a/src/lib.rs b/src/lib.rs index 159432a..28eda2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,14 @@ //! via client wrappers. For example, `OpenAIClient` serves as a client for OpenAI's ChatGPT, abstracting the interaction //! specifics and presenting a unified interface. //! +//! - **Connection Pooling**: All HTTP clients automatically use persistent connection pooling to minimize latency. +//! Each base URL (e.g., api.openai.com, api.anthropic.com) maintains its own connection pool with: +//! - Reused HTTP connections to avoid TCP handshake overhead +//! - Minimized DNS lookups through connection reuse +//! - Persistent TLS sessions to skip expensive handshakes +//! - TCP keepalive to prevent connection timeouts +//! This design ensures optimal performance in co-located and distributed deployments. +//! //! ## The Road Ahead: LLM-VM Architecture //! //! The library is poised to evolve into a more sophisticated toolset with the introduction of the "LLM-VM" architecture.