edgee-ai · CLEMENTINATOR · Feb 4, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 4, 2026
@@ -42,6 +42,15 @@ response = edgee.send(
 print(response.text)           # Text content
 print(response.finish_reason)  # Finish reason
 print(response.tool_calls)     # Tool calls (if any)
+
+# Access usage and compression info
+if response.usage:
+    print(f"Tokens used: {response.usage.total_tokens}")
+
+if response.compression:
+    print(f"Input tokens: {response.compression.input_tokens}")
+    print(f"Saved tokens: {response.compression.saved_tokens}")
+    print(f"Compression rate: {response.compression.rate}")
 ```
 
 ## Stream Method
@@ -64,6 +73,7 @@ for chunk in edgee.stream("gpt-4o", "Tell me a story"):
 - ✅ **Streaming** - Real-time response streaming with generators
 - ✅ **Tool calling** - Full support for function calling
 - ✅ **Flexible input** - Accept strings, dicts, or InputObject
+- ✅ **Compression info** - Access token compression metrics in responses
 - ✅ **Zero dependencies** - Uses only Python standard library
 
 ## Documentation

@@ -46,6 +46,12 @@ class InputObject:
     tools: list[dict] | None = None
     tool_choice: str | dict | None = None
     tags: list[str] | None = None
+    enable_compression: bool | None = (
+        None  # Enable token compression (gateway-internal, not sent to providers)
+    )
+    compression_rate: float | None = (
+        None  # Compression rate 0.0-1.0 (gateway-internal, not sent to providers)
+    )
 
 
 @dataclass
@@ -62,10 +68,18 @@ class Usage:
     total_tokens: int
 
 
+@dataclass
+class Compression:
+    input_tokens: int
+    saved_tokens: int
+    rate: float
+
+
 @dataclass
 class SendResponse:
     choices: list[Choice]
     usage: Usage | None = None
+    compression: Compression | None = None
 
     @property
     def text(self) -> str | None:
@@ -190,16 +204,22 @@ def send(
             tools = None
             tool_choice = None
             tags = None
+            enable_compression = None
+            compression_rate = None
         elif isinstance(input, InputObject):
             messages = input.messages
             tools = input.tools
             tool_choice = input.tool_choice
             tags = input.tags
+            enable_compression = input.enable_compression
+            compression_rate = input.compression_rate
         else:
             messages = input.get("messages", [])
             tools = input.get("tools")
             tool_choice = input.get("tool_choice")
             tags = input.get("tags")
+            enable_compression = input.get("enable_compression")
+            compression_rate = input.get("compression_rate")
 
         body: dict = {"model": model, "messages": messages}
         if stream:
@@ -210,6 +230,10 @@ def send(
             body["tool_choice"] = tool_choice
         if tags:
             body["tags"] = tags
+        if enable_compression is not None:
+            body["enable_compression"] = enable_compression
+        if compression_rate is not None:
+            body["compression_rate"] = compression_rate
 
         request = Request(
             f"{self.base_url}{API_ENDPOINT}",
@@ -252,7 +276,15 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse:
                 total_tokens=data["usage"]["total_tokens"],
             )
 
-        return SendResponse(choices=choices, usage=usage)
+        compression = None
+        if "compression" in data:
+            compression = Compression(
+                input_tokens=data["compression"]["input_tokens"],
+                saved_tokens=data["compression"]["saved_tokens"],
+                rate=data["compression"]["rate"],
+            )
+
+        return SendResponse(choices=choices, usage=usage, compression=compression)
 
     def _handle_streaming_response(self, request: Request):
         """Handle streaming response, yielding StreamChunk objects."""

@@ -0,0 +1,135 @@
+"""Example: Token compression with Edgee Gateway SDK
+
+This example demonstrates how to:
+1. Enable compression for a request with a large input context
+2. Set a custom compression rate
+3. Access compression metrics from the response
+
+IMPORTANT: Only USER messages are compressed. System messages are not compressed.
+This example includes a large context in the user message to demonstrate meaningful
+compression savings.
+"""
+
+import os
+import sys
+
+# Add parent directory to path for local testing
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from edgee import Edgee
+
+# Initialize the client
+edgee = Edgee(os.environ.get("EDGEE_API_KEY"))
+
+# Large context document to demonstrate input compression
+LARGE_CONTEXT = """
+The History and Impact of Artificial Intelligence
+
+Artificial intelligence (AI) has evolved from a theoretical concept to a
+transformative technology that influences nearly every aspect of modern life.
+The field began in earnest in the 1950s when pioneers like Alan Turing and
+John McCarthy laid the groundwork for machine intelligence.
+
+Early developments focused on symbolic reasoning and expert systems. These
+rule-based approaches dominated the field through the 1970s and 1980s, with
+systems like MYCIN demonstrating practical applications in medical diagnosis.
+However, these early systems were limited by their inability to learn from data
+and adapt to new situations.
+
+The resurgence of neural networks in the 1980s and 1990s, particularly with
+backpropagation algorithms, opened new possibilities. Yet it wasn't until the
+2010s, with the advent of deep learning and the availability of massive datasets
+and computational power, that AI truly began to revolutionize industries.
+
+Modern AI applications span numerous domains:
+- Natural language processing enables machines to understand and generate human language
+- Computer vision allows machines to interpret visual information from the world
+- Robotics combines AI with mechanical systems for autonomous operation
+- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment
+- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment
+- Transportation is being transformed by autonomous vehicles and traffic optimization
+
+The development of large language models like GPT, BERT, and others has
+particularly accelerated progress in natural language understanding and generation.
+These models, trained on vast amounts of text data, can perform a wide range of
+language tasks with remarkable proficiency.
+
+Despite remarkable progress, significant challenges remain. Issues of bias,
+interpretability, safety, and ethical considerations continue to be areas of
+active research and debate. The AI community is working to ensure that these
+powerful technologies are developed and deployed responsibly, with consideration
+for their societal impact.
+
+Looking forward, AI is expected to continue advancing rapidly, with potential
+breakthroughs in areas like artificial general intelligence, quantum machine
+learning, and brain-computer interfaces. The integration of AI into daily life
+will likely deepen, raising important questions about human-AI collaboration,
+workforce transformation, and the future of human cognition itself.
+"""
+
+print("=" * 70)
+print("Edgee Token Compression Example")
+print("=" * 70)
+print()
+
+# Example: Request with compression enabled and large input
+print("Example: Large user message with compression enabled")
+print("-" * 70)
+print(f"Input context length: {len(LARGE_CONTEXT)} characters")
+print()
+
+# NOTE: Only USER messages are compressed
+# Put the large context in the user message to demonstrate compression
+user_message = f"""Here is some context about AI:
+
+{LARGE_CONTEXT}
+
+Based on this context, summarize the key milestones in AI development in 3 bullet points."""
+
+response = edgee.send(
+    model="gpt-4o",
+    input={
+        "messages": [
+            {"role": "user", "content": user_message},
+        ],
+        "enable_compression": True,
+        "compression_rate": 0.5,
+    },
+)
+
+print(f"Response: {response.text}")
+print()
+
+# Display usage information
+if response.usage:
+    print("Token Usage:")
+    print(f"  Prompt tokens:     {response.usage.prompt_tokens}")
+    print(f"  Completion tokens: {response.usage.completion_tokens}")
+    print(f"  Total tokens:      {response.usage.total_tokens}")
+    print()
+
+# Display compression information
+if response.compression:
+    print("Compression Metrics:")
+    print(f"  Input tokens:  {response.compression.input_tokens}")
+    print(f"  Saved tokens:  {response.compression.saved_tokens}")
+    print(f"  Compression rate: {response.compression.rate:.2%}")
+    savings_pct = (
+        (response.compression.saved_tokens / response.compression.input_tokens * 100)
+        if response.compression.input_tokens > 0
+        else 0
+    )
+    print(f"  Savings: {savings_pct:.1f}% of input tokens saved!")
+    print()
+    print("  💡 Without compression, this request would have used")
+    print(f"     {response.compression.input_tokens} input tokens.")
+    print(
+        f"     With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!"
+    )
+else:
+    print("No compression data available in response.")
+    print("Note: Compression data is only returned when compression is enabled")
+    print("      and supported by your API key configuration.")
+
+print()
+print("=" * 70)
@@ -306,3 +306,50 @@ def test_config_base_url_overrides_env(self, mock_urlopen):
 
         call_args = mock_urlopen.call_args[0][0]
         assert call_args.full_url == f"{config_base_url}/v1/chat/completions"
+
+    @patch("edgee.urlopen")
+    def test_send_with_compression_response(self, mock_urlopen):
+        """Should handle response with compression field"""
+        mock_response_data = {
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": "Response"},
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150},
+            "compression": {
+                "input_tokens": 100,
+                "saved_tokens": 42,
+                "rate": 0.6102003642987249,
+            },
+        }
+        mock_urlopen.return_value = self._mock_response(mock_response_data)
+
+        client = Edgee("test-api-key")
+        result = client.send(model="gpt-4", input="Test")
+
+        assert result.compression is not None
+        assert result.compression.input_tokens == 100
+        assert result.compression.saved_tokens == 42
+        assert result.compression.rate == 0.6102003642987249
+
+    @patch("edgee.urlopen")
+    def test_send_without_compression_response(self, mock_urlopen):
+        """Should handle response without compression field"""
+        mock_response_data = {
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": "Response"},
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        mock_urlopen.return_value = self._mock_response(mock_response_data)
+
+        client = Edgee("test-api-key")
+        result = client.send(model="gpt-4", input="Test")
+
+        assert result.compression is None