From cebbc2b5da7c5f1d7f6ee8f0e8c332c854a49f38 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Tue, 3 Feb 2026 17:56:13 +0100
Subject: [PATCH 1/9] feat: add optional compression fields to completion
 requests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for compression configuration with optional fields that can be
passed per request to override API key settings.

- Add optional `enable_compression: bool | None` field to `InputObject` dataclass
- Add optional `compression_rate: float | None` field to `InputObject` dataclass
- Include compression fields in request body when provided via InputObject or dict input
- Compression fields are omitted from request when not provided
- Fields are gateway-internal and not sent to providers

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 edgee/__init__.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/edgee/__init__.py b/edgee/__init__.py
index 65f5dc2..6fec3db 100644
--- a/edgee/__init__.py
+++ b/edgee/__init__.py
@@ -46,6 +46,8 @@ class InputObject:
     tools: list[dict] | None = None
     tool_choice: str | dict | None = None
     tags: list[str] | None = None
+    enable_compression: bool | None = None  # Enable token compression (gateway-internal, not sent to providers)
+    compression_rate: float | None = None  # Compression rate 0.0-1.0 (gateway-internal, not sent to providers)
 
 
 @dataclass
@@ -190,16 +192,22 @@ def send(
             tools = None
             tool_choice = None
             tags = None
+            enable_compression = None
+            compression_rate = None
         elif isinstance(input, InputObject):
             messages = input.messages
             tools = input.tools
             tool_choice = input.tool_choice
             tags = input.tags
+            enable_compression = input.enable_compression
+            compression_rate = input.compression_rate
         else:
             messages = input.get("messages", [])
             tools = input.get("tools")
             tool_choice = input.get("tool_choice")
             tags = input.get("tags")
+            enable_compression = input.get("enable_compression")
+            compression_rate = input.get("compression_rate")
 
         body: dict = {"model": model, "messages": messages}
         if stream:
@@ -210,6 +218,10 @@ def send(
             body["tool_choice"] = tool_choice
         if tags:
             body["tags"] = tags
+        if enable_compression is not None:
+            body["enable_compression"] = enable_compression
+        if compression_rate is not None:
+            body["compression_rate"] = compression_rate
 
         request = Request(
             f"{self.base_url}{API_ENDPOINT}",

From c483db765272d889b03ad40366857d7ea4bb7acb Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Tue, 3 Feb 2026 18:03:51 +0100
Subject: [PATCH 2/9] ruff

---
 edgee/__init__.py | 8 ++++++--
 uv.lock           | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/edgee/__init__.py b/edgee/__init__.py
index 6fec3db..653a4df 100644
--- a/edgee/__init__.py
+++ b/edgee/__init__.py
@@ -46,8 +46,12 @@ class InputObject:
     tools: list[dict] | None = None
     tool_choice: str | dict | None = None
     tags: list[str] | None = None
-    enable_compression: bool | None = None  # Enable token compression (gateway-internal, not sent to providers)
-    compression_rate: float | None = None  # Compression rate 0.0-1.0 (gateway-internal, not sent to providers)
+    enable_compression: bool | None = (
+        None  # Enable token compression (gateway-internal, not sent to providers)
+    )
+    compression_rate: float | None = (
+        None  # Compression rate 0.0-1.0 (gateway-internal, not sent to providers)
+    )
 
 
 @dataclass
diff --git a/uv.lock b/uv.lock
index ccc443e..8842f95 100644
--- a/uv.lock
+++ b/uv.lock
@@ -13,7 +13,7 @@ wheels = [
 
 [[package]]
 name = "edgee"
-version = "0.1.1"
+version = "1.0.0"
 source = { editable = "." }
 
 [package.optional-dependencies]

From db38a5cf06a4f3e33ac53f8b9f4d3725486ca80c Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 15:48:54 +0100
Subject: [PATCH 3/9] feat: add compression response field to SendResponse

- Add Compression dataclass with input_tokens, saved_tokens, and rate fields
- Add optional compression field to SendResponse for root-level compression data
---
 edgee/__init__.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/edgee/__init__.py b/edgee/__init__.py
index 653a4df..40cd434 100644
--- a/edgee/__init__.py
+++ b/edgee/__init__.py
@@ -68,10 +68,18 @@ class Usage:
     total_tokens: int
 
 
+@dataclass
+class Compression:
+    input_tokens: int
+    saved_tokens: int
+    rate: float
+
+
 @dataclass
 class SendResponse:
     choices: list[Choice]
     usage: Usage | None = None
+    compression: Compression | None = None
 
     @property
     def text(self) -> str | None:

From 9b320ffded5ed61ebce4a1978655c6cb295cd6a4 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:04:16 +0100
Subject: [PATCH 4/9] test: add compression response field tests

- Add test for response with compression field
- Add test for response without compression field
- Update _handle_non_streaming_response to parse compression data
---
 edgee/__init__.py   | 10 +++++++++-
 tests/test_edgee.py | 47 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/edgee/__init__.py b/edgee/__init__.py
index 40cd434..b8d5520 100644
--- a/edgee/__init__.py
+++ b/edgee/__init__.py
@@ -276,7 +276,15 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse:
                 total_tokens=data["usage"]["total_tokens"],
             )
 
-        return SendResponse(choices=choices, usage=usage)
+        compression = None
+        if "compression" in data:
+            compression = Compression(
+                input_tokens=data["compression"]["input_tokens"],
+                saved_tokens=data["compression"]["saved_tokens"],
+                rate=data["compression"]["rate"],
+            )
+
+        return SendResponse(choices=choices, usage=usage, compression=compression)
 
     def _handle_streaming_response(self, request: Request):
         """Handle streaming response, yielding StreamChunk objects."""
diff --git a/tests/test_edgee.py b/tests/test_edgee.py
index 048a6b7..520d714 100644
--- a/tests/test_edgee.py
+++ b/tests/test_edgee.py
@@ -306,3 +306,50 @@ def test_config_base_url_overrides_env(self, mock_urlopen):
 
         call_args = mock_urlopen.call_args[0][0]
         assert call_args.full_url == f"{config_base_url}/v1/chat/completions"
+
+    @patch("edgee.urlopen")
+    def test_send_with_compression_response(self, mock_urlopen):
+        """Should handle response with compression field"""
+        mock_response_data = {
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": "Response"},
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150},
+            "compression": {
+                "input_tokens": 100,
+                "saved_tokens": 42,
+                "rate": 0.6102003642987249,
+            },
+        }
+        mock_urlopen.return_value = self._mock_response(mock_response_data)
+
+        client = Edgee("test-api-key")
+        result = client.send(model="gpt-4", input="Test")
+
+        assert result.compression is not None
+        assert result.compression.input_tokens == 100
+        assert result.compression.saved_tokens == 42
+        assert result.compression.rate == 0.6102003642987249
+
+    @patch("edgee.urlopen")
+    def test_send_without_compression_response(self, mock_urlopen):
+        """Should handle response without compression field"""
+        mock_response_data = {
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": "Response"},
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        mock_urlopen.return_value = self._mock_response(mock_response_data)
+
+        client = Edgee("test-api-key")
+        result = client.send(model="gpt-4", input="Test")
+
+        assert result.compression is None

From 0ba06cd33f69724cdba3e4ece6e766d0334b4d4e Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:15:36 +0100
Subject: [PATCH 5/9] docs: add compression field documentation to README

- Add example showing how to access compression data in responses
- Add compression info to features list
---
 README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/README.md b/README.md
index 86fcc3c..b179649 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,15 @@ response = edgee.send(
 print(response.text)           # Text content
 print(response.finish_reason)  # Finish reason
 print(response.tool_calls)     # Tool calls (if any)
+
+# Access usage and compression info
+if response.usage:
+    print(f"Tokens used: {response.usage.total_tokens}")
+
+if response.compression:
+    print(f"Input tokens: {response.compression.input_tokens}")
+    print(f"Saved tokens: {response.compression.saved_tokens}")
+    print(f"Compression rate: {response.compression.rate}")
 ```
 
 ## Stream Method
@@ -64,6 +73,7 @@ for chunk in edgee.stream("gpt-4o", "Tell me a story"):
 - ✅ **Streaming** - Real-time response streaming with generators
 - ✅ **Tool calling** - Full support for function calling
 - ✅ **Flexible input** - Accept strings, dicts, or InputObject
+- ✅ **Compression info** - Access token compression metrics in responses
 - ✅ **Zero dependencies** - Uses only Python standard library
 
 ## Documentation

From ea3c90953b6699a1200834445be538a2268a8d29 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:23:06 +0100
Subject: [PATCH 6/9] docs: add compression example

- Add example showing how to enable compression and set compression rate
- Demonstrate accessing compression metrics from response
- Show usage information alongside compression data
---
 example/compression.py | 61 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 example/compression.py

diff --git a/example/compression.py b/example/compression.py
new file mode 100644
index 0000000..c24cf39
--- /dev/null
+++ b/example/compression.py
@@ -0,0 +1,61 @@
+"""Example: Token compression with Edgee Gateway SDK
+
+This example demonstrates how to:
+1. Enable compression for a request
+2. Set a custom compression rate
+3. Access compression metrics from the response
+"""
+
+import os
+import sys
+
+# Add parent directory to path for local testing
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from edgee import Edgee
+
+# Initialize the client
+edgee = Edgee(os.environ.get("EDGEE_API_KEY"))
+
+print("=" * 70)
+print("Edgee Token Compression Example")
+print("=" * 70)
+print()
+
+# Example 1: Request with compression enabled
+print("Example 1: Request with compression enabled")
+print("-" * 70)
+response = edgee.send(
+    model="gpt-4o",
+    input={
+        "messages": [{"role": "user", "content": "Explain quantum computing in simple terms."}],
+        "enable_compression": True,
+        "compression_rate": 0.5,
+    },
+)
+
+print(f"Response: {response.text}")
+print()
+
+# Display usage information
+if response.usage:
+    print("Token Usage:")
+    print(f"  Prompt tokens:     {response.usage.prompt_tokens}")
+    print(f"  Completion tokens: {response.usage.completion_tokens}")
+    print(f"  Total tokens:      {response.usage.total_tokens}")
+    print()
+
+# Display compression information
+if response.compression:
+    print("Compression Metrics:")
+    print(f"  Input tokens:  {response.compression.input_tokens}")
+    print(f"  Saved tokens:  {response.compression.saved_tokens}")
+    print(f"  Compression rate: {response.compression.rate:.2%}")
+    print(f"  Token savings: {response.compression.saved_tokens} tokens saved!")
+else:
+    print("No compression data available in response.")
+    print("Note: Compression data is only returned when compression is enabled")
+    print("      and supported by your API key configuration.")
+
+print()
+print("=" * 70)

From a60858348af85617ac96f87394050a833f64f33e Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:26:12 +0100
Subject: [PATCH 7/9] docs: update compression example with larger input
 context

- Add substantial AI history document as context (~3000+ chars)
- Demonstrate meaningful compression on large input
- Show percentage of tokens saved
- Explain that compression works on input tokens
---
 example/compression.py | 79 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 74 insertions(+), 5 deletions(-)

diff --git a/example/compression.py b/example/compression.py
index c24cf39..a9d2ceb 100644
--- a/example/compression.py
+++ b/example/compression.py
@@ -1,9 +1,12 @@
 """Example: Token compression with Edgee Gateway SDK
 
 This example demonstrates how to:
-1. Enable compression for a request
+1. Enable compression for a request with a large input context
 2. Set a custom compression rate
 3. Access compression metrics from the response
+
+Note: Compression works on INPUT tokens, so this example includes a large
+context document to demonstrate meaningful compression savings.
 """
 
 import os
@@ -17,18 +20,73 @@
 # Initialize the client
 edgee = Edgee(os.environ.get("EDGEE_API_KEY"))
 
+# Large context document to demonstrate input compression
+LARGE_CONTEXT = """
+The History and Impact of Artificial Intelligence
+
+Artificial intelligence (AI) has evolved from a theoretical concept to a 
+transformative technology that influences nearly every aspect of modern life. 
+The field began in earnest in the 1950s when pioneers like Alan Turing and 
+John McCarthy laid the groundwork for machine intelligence.
+
+Early developments focused on symbolic reasoning and expert systems. These 
+rule-based approaches dominated the field through the 1970s and 1980s, with 
+systems like MYCIN demonstrating practical applications in medical diagnosis. 
+However, these early systems were limited by their inability to learn from data 
+and adapt to new situations.
+
+The resurgence of neural networks in the 1980s and 1990s, particularly with 
+backpropagation algorithms, opened new possibilities. Yet it wasn't until the 
+2010s, with the advent of deep learning and the availability of massive datasets 
+and computational power, that AI truly began to revolutionize industries.
+
+Modern AI applications span numerous domains:
+- Natural language processing enables machines to understand and generate human language
+- Computer vision allows machines to interpret visual information from the world
+- Robotics combines AI with mechanical systems for autonomous operation
+- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment
+- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment
+- Transportation is being transformed by autonomous vehicles and traffic optimization
+
+The development of large language models like GPT, BERT, and others has 
+particularly accelerated progress in natural language understanding and generation. 
+These models, trained on vast amounts of text data, can perform a wide range of 
+language tasks with remarkable proficiency.
+
+Despite remarkable progress, significant challenges remain. Issues of bias, 
+interpretability, safety, and ethical considerations continue to be areas of 
+active research and debate. The AI community is working to ensure that these 
+powerful technologies are developed and deployed responsibly, with consideration 
+for their societal impact.
+
+Looking forward, AI is expected to continue advancing rapidly, with potential 
+breakthroughs in areas like artificial general intelligence, quantum machine 
+learning, and brain-computer interfaces. The integration of AI into daily life 
+will likely deepen, raising important questions about human-AI collaboration, 
+workforce transformation, and the future of human cognition itself.
+"""
+
 print("=" * 70)
 print("Edgee Token Compression Example")
 print("=" * 70)
 print()
 
-# Example 1: Request with compression enabled
-print("Example 1: Request with compression enabled")
+# Example: Request with compression enabled and large input
+print("Example: Large context with compression enabled")
 print("-" * 70)
+print(f"Input context length: {len(LARGE_CONTEXT)} characters")
+print()
+
 response = edgee.send(
     model="gpt-4o",
     input={
-        "messages": [{"role": "user", "content": "Explain quantum computing in simple terms."}],
+        "messages": [
+            {"role": "system", "content": LARGE_CONTEXT},
+            {
+                "role": "user",
+                "content": "Based on the context above, summarize the key milestones in AI development in 3 bullet points.",
+            },
+        ],
         "enable_compression": True,
         "compression_rate": 0.5,
     },
@@ -51,7 +109,18 @@
     print(f"  Input tokens:  {response.compression.input_tokens}")
     print(f"  Saved tokens:  {response.compression.saved_tokens}")
     print(f"  Compression rate: {response.compression.rate:.2%}")
-    print(f"  Token savings: {response.compression.saved_tokens} tokens saved!")
+    savings_pct = (
+        (response.compression.saved_tokens / response.compression.input_tokens * 100)
+        if response.compression.input_tokens > 0
+        else 0
+    )
+    print(f"  Savings: {savings_pct:.1f}% of input tokens saved!")
+    print()
+    print(f"  💡 Without compression, this request would have used")
+    print(f"     {response.compression.input_tokens} input tokens.")
+    print(
+        f"     With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!"
+    )
 else:
     print("No compression data available in response.")
     print("Note: Compression data is only returned when compression is enabled")

From 0cdb016572697f121e52be498391d1431c68a503 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:35:03 +0100
Subject: [PATCH 8/9] fix: move large context to user message in compression
 example

- Only USER messages are compressed, not system messages
- Update example to put context in user message
- Add clarifying comment about compression behavior
---
 example/compression.py | 63 +++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 29 deletions(-)

diff --git a/example/compression.py b/example/compression.py
index a9d2ceb..814be70 100644
--- a/example/compression.py
+++ b/example/compression.py
@@ -5,8 +5,9 @@
 2. Set a custom compression rate
 3. Access compression metrics from the response
 
-Note: Compression works on INPUT tokens, so this example includes a large
-context document to demonstrate meaningful compression savings.
+IMPORTANT: Only USER messages are compressed. System messages are not compressed.
+This example includes a large context in the user message to demonstrate meaningful
+compression savings.
 """
 
 import os
@@ -24,20 +25,20 @@
 LARGE_CONTEXT = """
 The History and Impact of Artificial Intelligence
 
-Artificial intelligence (AI) has evolved from a theoretical concept to a 
-transformative technology that influences nearly every aspect of modern life. 
-The field began in earnest in the 1950s when pioneers like Alan Turing and 
+Artificial intelligence (AI) has evolved from a theoretical concept to a
+transformative technology that influences nearly every aspect of modern life.
+The field began in earnest in the 1950s when pioneers like Alan Turing and
 John McCarthy laid the groundwork for machine intelligence.
 
-Early developments focused on symbolic reasoning and expert systems. These 
-rule-based approaches dominated the field through the 1970s and 1980s, with 
-systems like MYCIN demonstrating practical applications in medical diagnosis. 
-However, these early systems were limited by their inability to learn from data 
+Early developments focused on symbolic reasoning and expert systems. These
+rule-based approaches dominated the field through the 1970s and 1980s, with
+systems like MYCIN demonstrating practical applications in medical diagnosis.
+However, these early systems were limited by their inability to learn from data
 and adapt to new situations.
 
-The resurgence of neural networks in the 1980s and 1990s, particularly with 
-backpropagation algorithms, opened new possibilities. Yet it wasn't until the 
-2010s, with the advent of deep learning and the availability of massive datasets 
+The resurgence of neural networks in the 1980s and 1990s, particularly with
+backpropagation algorithms, opened new possibilities. Yet it wasn't until the
+2010s, with the advent of deep learning and the availability of massive datasets
 and computational power, that AI truly began to revolutionize industries.
 
 Modern AI applications span numerous domains:
@@ -48,21 +49,21 @@
 - Finance leverages AI for fraud detection, algorithmic trading, and risk assessment
 - Transportation is being transformed by autonomous vehicles and traffic optimization
 
-The development of large language models like GPT, BERT, and others has 
-particularly accelerated progress in natural language understanding and generation. 
-These models, trained on vast amounts of text data, can perform a wide range of 
+The development of large language models like GPT, BERT, and others has
+particularly accelerated progress in natural language understanding and generation.
+These models, trained on vast amounts of text data, can perform a wide range of
 language tasks with remarkable proficiency.
 
-Despite remarkable progress, significant challenges remain. Issues of bias, 
-interpretability, safety, and ethical considerations continue to be areas of 
-active research and debate. The AI community is working to ensure that these 
-powerful technologies are developed and deployed responsibly, with consideration 
+Despite remarkable progress, significant challenges remain. Issues of bias,
+interpretability, safety, and ethical considerations continue to be areas of
+active research and debate. The AI community is working to ensure that these
+powerful technologies are developed and deployed responsibly, with consideration
 for their societal impact.
 
-Looking forward, AI is expected to continue advancing rapidly, with potential 
-breakthroughs in areas like artificial general intelligence, quantum machine 
-learning, and brain-computer interfaces. The integration of AI into daily life 
-will likely deepen, raising important questions about human-AI collaboration, 
+Looking forward, AI is expected to continue advancing rapidly, with potential
+breakthroughs in areas like artificial general intelligence, quantum machine
+learning, and brain-computer interfaces. The integration of AI into daily life
+will likely deepen, raising important questions about human-AI collaboration,
 workforce transformation, and the future of human cognition itself.
 """
 
@@ -72,20 +73,24 @@
 print()
 
 # Example: Request with compression enabled and large input
-print("Example: Large context with compression enabled")
+print("Example: Large user message with compression enabled")
 print("-" * 70)
 print(f"Input context length: {len(LARGE_CONTEXT)} characters")
 print()
 
+# NOTE: Only USER messages are compressed
+# Put the large context in the user message to demonstrate compression
+user_message = f"""Here is some context about AI:
+
+{LARGE_CONTEXT}
+
+Based on this context, summarize the key milestones in AI development in 3 bullet points."""
+
 response = edgee.send(
     model="gpt-4o",
     input={
         "messages": [
-            {"role": "system", "content": LARGE_CONTEXT},
-            {
-                "role": "user",
-                "content": "Based on the context above, summarize the key milestones in AI development in 3 bullet points.",
-            },
+            {"role": "user", "content": user_message},
         ],
         "enable_compression": True,
         "compression_rate": 0.5,

From f4e9f2b8fcf16912809d073a14f161a7c9e48667 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:37:48 +0100
Subject: [PATCH 9/9] ruff

---
 example/compression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/example/compression.py b/example/compression.py
index 814be70..d446279 100644
--- a/example/compression.py
+++ b/example/compression.py
@@ -121,7 +121,7 @@
     )
     print(f"  Savings: {savings_pct:.1f}% of input tokens saved!")
     print()
-    print(f"  💡 Without compression, this request would have used")
+    print("  💡 Without compression, this request would have used")
     print(f"     {response.compression.input_tokens} input tokens.")
     print(
         f"     With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!"