diff --git a/.gitignore b/.gitignore
index 32e5114..915379f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,8 @@ Cargo.lock
*.pdb
.idea/
-run_example
\ No newline at end of file
+run_example
+
+# Temporary test binaries
+verify_pooling
+/tmp/*.rs
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index fb1dd08..9b8be44 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,4 +17,6 @@ openai-rust2 = { version = "1.6.0" }
async-trait = "0.1.88"
log = "0.4.27"
-env_logger = "0.11.8"
\ No newline at end of file
+env_logger = "0.11.8"
+once_cell = "1.21"
+reqwest = { version = "0.12", features = ["json", "stream"] }
\ No newline at end of file
diff --git a/changelog.txt b/changelog.txt
index 4faa249..083b549 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -1,3 +1,12 @@
+0.2.13 (Upcoming)
+ - Implemented HTTP client connection pooling for all providers:
+ - Created http_pool module that maintains singleton reqwest::Client instances per base URL
+ - Configured clients with persistent connections (90s idle timeout, 10 max idle per host)
+ - Added TCP keepalive (60s) to prevent connection drops
+ - Minimizes DNS lookups, TLS handshakes, and TCP connection overhead
+ - All clients (OpenAI, Gemini, Claude, Grok) automatically benefit from connection pooling
+ - Updated documentation to reflect connection pooling behavior
+
0.2.12 SEP/21/2025
- Added Claude client implementation at src/cloudllm/clients/claude.rs:
- ClaudeClient struct follows the same delegate pattern as GrokClient, using OpenAIClient internally
diff --git a/src/cloudllm/clients/claude.rs b/src/cloudllm/clients/claude.rs
index e08682a..62bffa7 100644
--- a/src/cloudllm/clients/claude.rs
+++ b/src/cloudllm/clients/claude.rs
@@ -1,15 +1,20 @@
use crate::client_wrapper::TokenUsage;
-use crate::clients::claude::Model::ClaudeSonnet4;
use crate::clients::openai::OpenAIClient;
-use crate::{ClientWrapper, LLMSession, Message, Role};
+use crate::{ClientWrapper, Message};
use async_trait::async_trait;
-use log::{error, info};
use openai_rust2 as openai_rust;
-use openai_rust2::chat::SearchMode;
-use std::env;
use std::error::Error;
use std::sync::Mutex;
-use tokio::runtime::Runtime;
+
+#[cfg(test)]
+use {
+ std::env,
+ tokio::runtime::Runtime,
+ crate::LLMSession,
+ crate::Role,
+ crate::clients::claude::Model::ClaudeSonnet4,
+ log::{error, info},
+};
pub struct ClaudeClient {
delegate_client: OpenAIClient,
diff --git a/src/cloudllm/clients/gemini.rs b/src/cloudllm/clients/gemini.rs
index 74e92e4..75edf11 100644
--- a/src/cloudllm/clients/gemini.rs
+++ b/src/cloudllm/clients/gemini.rs
@@ -1,15 +1,20 @@
use crate::client_wrapper::TokenUsage;
use crate::clients::common::send_and_track;
-use crate::clients::openai::OpenAIClient;
-use crate::{ClientWrapper, LLMSession, Message, Role};
+use crate::clients::http_pool::get_http_client;
+use crate::{ClientWrapper, Message, Role};
use async_trait::async_trait;
-use log::{error, info};
+use log::error;
use openai_rust::chat;
use openai_rust2 as openai_rust;
-use std::env;
-use std::error::Error;
use std::sync::Mutex;
-use tokio::runtime::Runtime;
+
+#[cfg(test)]
+use {
+ std::env,
+ tokio::runtime::Runtime,
+ crate::LLMSession,
+ log::info,
+};
pub struct GeminiClient {
client: openai_rust::Client,
@@ -148,10 +153,14 @@ pub fn model_to_string(model: Model) -> String {
impl GeminiClient {
pub fn new_with_model_string(secret_key: &str, model_name: &str) -> Self {
+ let base_url = "https://generativelanguage.googleapis.com/v1beta/";
+ let http_client = get_http_client(base_url);
+
GeminiClient {
- client: openai_rust::Client::new_with_base_url(
+ client: openai_rust::Client::new_with_client_and_base_url(
secret_key,
- "https://generativelanguage.googleapis.com/v1beta/",
+ http_client,
+ base_url,
),
model: model_name.to_string(),
token_usage: Mutex::new(None),
@@ -165,8 +174,10 @@ impl GeminiClient {
/// This function is used to create a GeminiClient with a custom base URL
/// The default base URL is ""
pub fn new_with_base_url(secret_key: &str, model_name: &str, base_url: &str) -> Self {
+ let http_client = get_http_client(base_url);
+
GeminiClient {
- client: openai_rust::Client::new_with_base_url(secret_key, base_url),
+ client: openai_rust::Client::new_with_client_and_base_url(secret_key, http_client, base_url),
model: model_name.to_string(),
token_usage: Mutex::new(None),
}
diff --git a/src/cloudllm/clients/grok.rs b/src/cloudllm/clients/grok.rs
index 9af32f2..ddd92ab 100644
--- a/src/cloudllm/clients/grok.rs
+++ b/src/cloudllm/clients/grok.rs
@@ -1,15 +1,21 @@
use crate::client_wrapper::TokenUsage;
-use crate::clients::grok::Model::Grok4_0709;
use crate::clients::openai::OpenAIClient;
-use crate::{ClientWrapper, LLMSession, Message, Role};
+use crate::{ClientWrapper, Message};
use async_trait::async_trait;
-use log::{error, info};
use openai_rust2 as openai_rust;
-use openai_rust2::chat::SearchMode;
-use std::env;
use std::error::Error;
use std::sync::Mutex;
-use tokio::runtime::Runtime;
+
+#[cfg(test)]
+use {
+ std::env,
+ tokio::runtime::Runtime,
+ crate::LLMSession,
+ crate::Role,
+ crate::clients::grok::Model::Grok4_0709,
+ openai_rust2::chat::SearchMode,
+ log::{error, info},
+};
pub struct GrokClient {
delegate_client: OpenAIClient,
diff --git a/src/cloudllm/clients/http_pool.rs b/src/cloudllm/clients/http_pool.rs
new file mode 100644
index 0000000..4eece09
--- /dev/null
+++ b/src/cloudllm/clients/http_pool.rs
@@ -0,0 +1,57 @@
+//! HTTP Client Pool for maintaining persistent connections per base URL.
+//!
+//! This module provides a singleton pool of reqwest::Client instances, one per base URL.
+//! This ensures that:
+//! - HTTP connections are reused across multiple requests (connection pooling)
+//! - DNS lookups are minimized
+//! - TLS handshakes are reused where possible
+//! - TCP connections are kept alive to avoid reconnection overhead
+//!
+//! The reqwest::Client is configured with optimal settings for persistent connections:
+//! - `pool_idle_timeout`: Keeps idle connections alive for 90 seconds
+//! - `pool_max_idle_per_host`: Allows up to 10 idle connections per host
+//! - `tcp_keepalive`: Sends keepalive packets every 60 seconds to prevent connection closure
+
+use once_cell::sync::Lazy;
+use reqwest;
+use std::collections::HashMap;
+use std::sync::Mutex;
+use std::time::Duration;
+
+/// Global HTTP client pool, lazily initialized on first access.
+static HTTP_CLIENT_POOL: Lazy>> =
+ Lazy::new(|| Mutex::new(HashMap::new()));
+
+/// Get or create a shared HTTP client for the given base URL.
+///
+/// This function maintains a singleton pool of reqwest::Client instances.
+/// Each base URL gets its own client to ensure proper connection pooling.
+///
+/// # Arguments
+/// * `base_url` - The base URL for which to get/create an HTTP client
+///
+/// # Returns
+/// A cloned reqwest::Client configured for persistent connections
+pub fn get_http_client(base_url: &str) -> reqwest::Client {
+ let mut pool = HTTP_CLIENT_POOL.lock().unwrap();
+
+ if let Some(client) = pool.get(base_url) {
+ return client.clone();
+ }
+
+ // Create a new client with optimal settings for persistent connections
+ let client = reqwest::ClientBuilder::new()
+ // Keep idle connections alive for 90 seconds
+ .pool_idle_timeout(Some(Duration::from_secs(90)))
+ // Allow up to 10 idle connections per host for better throughput
+ .pool_max_idle_per_host(10)
+ // Enable TCP keepalive to prevent connection drops
+ .tcp_keepalive(Some(Duration::from_secs(60)))
+ // Set a reasonable timeout for the entire request
+ .timeout(Duration::from_secs(300))
+ .build()
+ .expect("Failed to build HTTP client");
+
+ pool.insert(base_url.to_string(), client.clone());
+ client
+}
diff --git a/src/cloudllm/clients/mod.rs b/src/cloudllm/clients/mod.rs
index 0dd7971..26539f9 100644
--- a/src/cloudllm/clients/mod.rs
+++ b/src/cloudllm/clients/mod.rs
@@ -1,5 +1,6 @@
// src/clients/mod.rs
pub mod common;
+pub mod http_pool;
pub mod claude;
pub mod gemini;
diff --git a/src/cloudllm/clients/openai.rs b/src/cloudllm/clients/openai.rs
index 6b70f7e..1c61a2f 100644
--- a/src/cloudllm/clients/openai.rs
+++ b/src/cloudllm/clients/openai.rs
@@ -7,6 +7,8 @@
//! - **send_message(...)**: unchanged signature; returns a `Message` as before.
//! - **Automatic Usage Capture**: stores the latest `TokenUsage` (input_tokens, output_tokens, total_tokens) internally.
//! - **Inspect Usage**: call `get_last_usage()` after `send_message()` to retrieve actual usage stats.
+//! - **Connection Pooling**: Automatically uses persistent HTTP connections. Multiple `OpenAIClient` instances
+//! with the same base URL share a connection pool, minimizing DNS lookups, TLS handshakes, and TCP overhead.
//!
//! # Example
//!
@@ -41,21 +43,27 @@
//!
//! Make sure `OPENAI_API_KEY` is set and pick a valid model name (e.g. `"gpt-4.1-nano"`).
-use std::env;
use std::error::Error;
use async_trait::async_trait;
-use log::{error, info};
+use log::error;
use openai_rust::chat;
use openai_rust2 as openai_rust;
use crate::client_wrapper::TokenUsage;
use crate::clients::common::send_and_track;
+use crate::clients::http_pool::get_http_client;
use crate::cloudllm::client_wrapper::{ClientWrapper, Message, Role};
use std::sync::Mutex;
-use tokio::runtime::Runtime;
-use crate::clients::openai::Model::GPT5Nano;
-use crate::LLMSession;
+
+#[cfg(test)]
+use {
+ std::env,
+ tokio::runtime::Runtime,
+ crate::LLMSession,
+ crate::clients::openai::Model::GPT5Nano,
+ log::info,
+};
pub enum Model {
GPT5, // Higher Reasoning, Medium speed, Text+Image input, Text output; input $1.25/1M tokens, cached input $0.125/1M tokens, output $10/1M tokens
@@ -119,16 +127,22 @@ impl OpenAIClient {
}
pub fn new_with_model_string(secret_key: &str, model_name: &str) -> Self {
+ // Use default OpenAI base URL
+ let base_url = "https://api.openai.com";
+ let http_client = get_http_client(base_url);
+
OpenAIClient {
- client: openai_rust::Client::new(secret_key),
+ client: openai_rust::Client::new_with_client(secret_key, http_client),
model: model_name.to_string(),
token_usage: Mutex::new(None),
}
}
pub fn new_with_base_url(secret_key: &str, model_name: &str, base_url: &str) -> Self {
+ let http_client = get_http_client(base_url);
+
OpenAIClient {
- client: openai_rust::Client::new_with_base_url(secret_key, base_url),
+ client: openai_rust::Client::new_with_client_and_base_url(secret_key, http_client, base_url),
model: model_name.to_string(),
token_usage: Mutex::new(None),
}
diff --git a/src/lib.rs b/src/lib.rs
index 159432a..28eda2a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,6 +14,14 @@
//! via client wrappers. For example, `OpenAIClient` serves as a client for OpenAI's ChatGPT, abstracting the interaction
//! specifics and presenting a unified interface.
//!
+//! - **Connection Pooling**: All HTTP clients automatically use persistent connection pooling to minimize latency.
+//! Each base URL (e.g., api.openai.com, api.anthropic.com) maintains its own connection pool with:
+//! - Reused HTTP connections to avoid TCP handshake overhead
+//! - Minimized DNS lookups through connection reuse
+//! - Persistent TLS sessions to skip expensive handshakes
+//! - TCP keepalive to prevent connection timeouts
+//! This design ensures optimal performance in co-located and distributed deployments.
+//!
//! ## The Road Ahead: LLM-VM Architecture
//!
//! The library is poised to evolve into a more sophisticated toolset with the introduction of the "LLM-VM" architecture.