Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,8 @@ Cargo.lock
*.pdb

.idea/
run_example
run_example

# Temporary test binaries
verify_pooling
/tmp/*.rs
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ openai-rust2 = { version = "1.6.0" }

async-trait = "0.1.88"
log = "0.4.27"
env_logger = "0.11.8"
env_logger = "0.11.8"
once_cell = "1.21"
reqwest = { version = "0.12", features = ["json", "stream"] }
9 changes: 9 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
0.2.13 (Upcoming)
- Implemented HTTP client connection pooling for all providers:
- Created http_pool module that maintains singleton reqwest::Client instances per base URL
- Configured clients with persistent connections (90s idle timeout, 10 max idle per host)
- Added TCP keepalive (60s) to prevent connection drops
- Minimizes DNS lookups, TLS handshakes, and TCP connection overhead
- All clients (OpenAI, Gemini, Claude, Grok) automatically benefit from connection pooling
- Updated documentation to reflect connection pooling behavior

0.2.12 SEP/21/2025
- Added Claude client implementation at src/cloudllm/clients/claude.rs:
- ClaudeClient struct follows the same delegate pattern as GrokClient, using OpenAIClient internally
Expand Down
17 changes: 11 additions & 6 deletions src/cloudllm/clients/claude.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
use crate::client_wrapper::TokenUsage;
use crate::clients::claude::Model::ClaudeSonnet4;
use crate::clients::openai::OpenAIClient;
use crate::{ClientWrapper, LLMSession, Message, Role};
use crate::{ClientWrapper, Message};
use async_trait::async_trait;
use log::{error, info};
use openai_rust2 as openai_rust;
use openai_rust2::chat::SearchMode;
use std::env;
use std::error::Error;
use std::sync::Mutex;
use tokio::runtime::Runtime;

#[cfg(test)]
use {
std::env,
tokio::runtime::Runtime,
crate::LLMSession,
crate::Role,
crate::clients::claude::Model::ClaudeSonnet4,
log::{error, info},
};

pub struct ClaudeClient {
delegate_client: OpenAIClient,
Expand Down
29 changes: 20 additions & 9 deletions src/cloudllm/clients/gemini.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
use crate::client_wrapper::TokenUsage;
use crate::clients::common::send_and_track;
use crate::clients::openai::OpenAIClient;
use crate::{ClientWrapper, LLMSession, Message, Role};
use crate::clients::http_pool::get_http_client;
use crate::{ClientWrapper, Message, Role};
use async_trait::async_trait;
use log::{error, info};
use log::error;
use openai_rust::chat;
use openai_rust2 as openai_rust;
use std::env;
use std::error::Error;
use std::sync::Mutex;
use tokio::runtime::Runtime;

#[cfg(test)]
use {
std::env,
tokio::runtime::Runtime,
crate::LLMSession,
log::info,
};

pub struct GeminiClient {
client: openai_rust::Client,
Expand Down Expand Up @@ -148,10 +153,14 @@ pub fn model_to_string(model: Model) -> String {

impl GeminiClient {
pub fn new_with_model_string(secret_key: &str, model_name: &str) -> Self {
let base_url = "https://generativelanguage.googleapis.com/v1beta/";
let http_client = get_http_client(base_url);

GeminiClient {
client: openai_rust::Client::new_with_base_url(
client: openai_rust::Client::new_with_client_and_base_url(
secret_key,
"https://generativelanguage.googleapis.com/v1beta/",
http_client,
base_url,
),
model: model_name.to_string(),
token_usage: Mutex::new(None),
Expand All @@ -165,8 +174,10 @@ impl GeminiClient {
/// This function is used to create a GeminiClient with a custom base URL
/// The default base URL is "<https://generativelanguage.googleapis.com/v1beta/>"
pub fn new_with_base_url(secret_key: &str, model_name: &str, base_url: &str) -> Self {
let http_client = get_http_client(base_url);

GeminiClient {
client: openai_rust::Client::new_with_base_url(secret_key, base_url),
client: openai_rust::Client::new_with_client_and_base_url(secret_key, http_client, base_url),
model: model_name.to_string(),
token_usage: Mutex::new(None),
}
Expand Down
18 changes: 12 additions & 6 deletions src/cloudllm/clients/grok.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
use crate::client_wrapper::TokenUsage;
use crate::clients::grok::Model::Grok4_0709;
use crate::clients::openai::OpenAIClient;
use crate::{ClientWrapper, LLMSession, Message, Role};
use crate::{ClientWrapper, Message};
use async_trait::async_trait;
use log::{error, info};
use openai_rust2 as openai_rust;
use openai_rust2::chat::SearchMode;
use std::env;
use std::error::Error;
use std::sync::Mutex;
use tokio::runtime::Runtime;

#[cfg(test)]
use {
std::env,
tokio::runtime::Runtime,
crate::LLMSession,
crate::Role,
crate::clients::grok::Model::Grok4_0709,
openai_rust2::chat::SearchMode,
log::{error, info},
};

pub struct GrokClient {
delegate_client: OpenAIClient,
Expand Down
57 changes: 57 additions & 0 deletions src/cloudllm/clients/http_pool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//! HTTP Client Pool for maintaining persistent connections per base URL.
//!
//! This module provides a singleton pool of reqwest::Client instances, one per base URL.
//! This ensures that:
//! - HTTP connections are reused across multiple requests (connection pooling)
//! - DNS lookups are minimized
//! - TLS handshakes are reused where possible
//! - TCP connections are kept alive to avoid reconnection overhead
//!
//! The reqwest::Client is configured with optimal settings for persistent connections:
//! - `pool_idle_timeout`: Keeps idle connections alive for 90 seconds
//! - `pool_max_idle_per_host`: Allows up to 10 idle connections per host
//! - `tcp_keepalive`: Sends keepalive packets every 60 seconds to prevent connection closure

use once_cell::sync::Lazy;
use reqwest;
use std::collections::HashMap;
use std::sync::Mutex;
use std::time::Duration;

/// Global HTTP client pool, lazily initialized on first access.
static HTTP_CLIENT_POOL: Lazy<Mutex<HashMap<String, reqwest::Client>>> =
Lazy::new(|| Mutex::new(HashMap::new()));

/// Get or create a shared HTTP client for the given base URL.
///
/// This function maintains a singleton pool of reqwest::Client instances.
/// Each base URL gets its own client to ensure proper connection pooling.
///
/// # Arguments
/// * `base_url` - The base URL for which to get/create an HTTP client
///
/// # Returns
/// A cloned reqwest::Client configured for persistent connections
pub fn get_http_client(base_url: &str) -> reqwest::Client {
let mut pool = HTTP_CLIENT_POOL.lock().unwrap();

if let Some(client) = pool.get(base_url) {
return client.clone();
}

// Create a new client with optimal settings for persistent connections
let client = reqwest::ClientBuilder::new()
// Keep idle connections alive for 90 seconds
.pool_idle_timeout(Some(Duration::from_secs(90)))
// Allow up to 10 idle connections per host for better throughput
.pool_max_idle_per_host(10)
// Enable TCP keepalive to prevent connection drops
.tcp_keepalive(Some(Duration::from_secs(60)))
// Set a reasonable timeout for the entire request
.timeout(Duration::from_secs(300))
.build()
.expect("Failed to build HTTP client");

pool.insert(base_url.to_string(), client.clone());
client
}
1 change: 1 addition & 0 deletions src/cloudllm/clients/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// src/clients/mod.rs
pub mod common;
pub mod http_pool;

pub mod claude;
pub mod gemini;
Expand Down
28 changes: 21 additions & 7 deletions src/cloudllm/clients/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
//! - **send_message(...)**: unchanged signature; returns a `Message` as before.
//! - **Automatic Usage Capture**: stores the latest `TokenUsage` (input_tokens, output_tokens, total_tokens) internally.
//! - **Inspect Usage**: call `get_last_usage()` after `send_message()` to retrieve actual usage stats.
//! - **Connection Pooling**: Automatically uses persistent HTTP connections. Multiple `OpenAIClient` instances
//! with the same base URL share a connection pool, minimizing DNS lookups, TLS handshakes, and TCP overhead.
//!
//! # Example
//!
Expand Down Expand Up @@ -41,21 +43,27 @@
//!
//! Make sure `OPENAI_API_KEY` is set and pick a valid model name (e.g. `"gpt-4.1-nano"`).

use std::env;
use std::error::Error;

use async_trait::async_trait;
use log::{error, info};
use log::error;
use openai_rust::chat;
use openai_rust2 as openai_rust;

use crate::client_wrapper::TokenUsage;
use crate::clients::common::send_and_track;
use crate::clients::http_pool::get_http_client;
use crate::cloudllm::client_wrapper::{ClientWrapper, Message, Role};
use std::sync::Mutex;
use tokio::runtime::Runtime;
use crate::clients::openai::Model::GPT5Nano;
use crate::LLMSession;

#[cfg(test)]
use {
std::env,
tokio::runtime::Runtime,
crate::LLMSession,
crate::clients::openai::Model::GPT5Nano,
log::info,
};

pub enum Model {
GPT5, // Higher Reasoning, Medium speed, Text+Image input, Text output; input $1.25/1M tokens, cached input $0.125/1M tokens, output $10/1M tokens
Expand Down Expand Up @@ -119,16 +127,22 @@ impl OpenAIClient {
}

pub fn new_with_model_string(secret_key: &str, model_name: &str) -> Self {
// Use default OpenAI base URL
let base_url = "https://api.openai.com";
let http_client = get_http_client(base_url);

OpenAIClient {
client: openai_rust::Client::new(secret_key),
client: openai_rust::Client::new_with_client(secret_key, http_client),
model: model_name.to_string(),
token_usage: Mutex::new(None),
}
}

pub fn new_with_base_url(secret_key: &str, model_name: &str, base_url: &str) -> Self {
let http_client = get_http_client(base_url);

OpenAIClient {
client: openai_rust::Client::new_with_base_url(secret_key, base_url),
client: openai_rust::Client::new_with_client_and_base_url(secret_key, http_client, base_url),
model: model_name.to_string(),
token_usage: Mutex::new(None),
}
Expand Down
8 changes: 8 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@
//! via client wrappers. For example, `OpenAIClient` serves as a client for OpenAI's ChatGPT, abstracting the interaction
//! specifics and presenting a unified interface.
//!
//! - **Connection Pooling**: All HTTP clients automatically use persistent connection pooling to minimize latency.
//! Each base URL (e.g., api.openai.com, api.anthropic.com) maintains its own connection pool with:
//! - Reused HTTP connections to avoid TCP handshake overhead
//! - Minimized DNS lookups through connection reuse
//! - Persistent TLS sessions to skip expensive handshakes
//! - TCP keepalive to prevent connection timeouts
//! This design ensures optimal performance in co-located and distributed deployments.
//!
//! ## The Road Ahead: LLM-VM Architecture
//!
//! The library is poised to evolve into a more sophisticated toolset with the introduction of the "LLM-VM" architecture.
Expand Down