From cebbc2b5da7c5f1d7f6ee8f0e8c332c854a49f38 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Tue, 3 Feb 2026 17:56:13 +0100 Subject: [PATCH 1/9] feat: add optional compression fields to completion requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for compression configuration with optional fields that can be passed per request to override API key settings. - Add optional `enable_compression: bool | None` field to `InputObject` dataclass - Add optional `compression_rate: float | None` field to `InputObject` dataclass - Include compression fields in request body when provided via InputObject or dict input - Compression fields are omitted from request when not provided - Fields are gateway-internal and not sent to providers 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- edgee/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/edgee/__init__.py b/edgee/__init__.py index 65f5dc2..6fec3db 100644 --- a/edgee/__init__.py +++ b/edgee/__init__.py @@ -46,6 +46,8 @@ class InputObject: tools: list[dict] | None = None tool_choice: str | dict | None = None tags: list[str] | None = None + enable_compression: bool | None = None # Enable token compression (gateway-internal, not sent to providers) + compression_rate: float | None = None # Compression rate 0.0-1.0 (gateway-internal, not sent to providers) @dataclass @@ -190,16 +192,22 @@ def send( tools = None tool_choice = None tags = None + enable_compression = None + compression_rate = None elif isinstance(input, InputObject): messages = input.messages tools = input.tools tool_choice = input.tool_choice tags = input.tags + enable_compression = input.enable_compression + compression_rate = input.compression_rate else: messages = input.get("messages", []) tools = input.get("tools") tool_choice = input.get("tool_choice") tags = input.get("tags") + enable_compression = input.get("enable_compression") + compression_rate = input.get("compression_rate") body: dict = {"model": model, "messages": messages} if stream: @@ -210,6 +218,10 @@ def send( body["tool_choice"] = tool_choice if tags: body["tags"] = tags + if enable_compression is not None: + body["enable_compression"] = enable_compression + if compression_rate is not None: + body["compression_rate"] = compression_rate request = Request( f"{self.base_url}{API_ENDPOINT}", From c483db765272d889b03ad40366857d7ea4bb7acb Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Tue, 3 Feb 2026 18:03:51 +0100 Subject: [PATCH 2/9] ruff --- edgee/__init__.py | 8 ++++++-- uv.lock | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/edgee/__init__.py b/edgee/__init__.py index 6fec3db..653a4df 100644 --- a/edgee/__init__.py +++ b/edgee/__init__.py @@ -46,8 +46,12 @@ class InputObject: tools: list[dict] | None = None tool_choice: str | dict | None = None tags: list[str] | None = None - enable_compression: bool | None = None # Enable token compression (gateway-internal, not sent to providers) - compression_rate: float | None = None # Compression rate 0.0-1.0 (gateway-internal, not sent to providers) + enable_compression: bool | None = ( + None # Enable token compression (gateway-internal, not sent to providers) + ) + compression_rate: float | None = ( + None # Compression rate 0.0-1.0 (gateway-internal, not sent to providers) + ) @dataclass diff --git a/uv.lock b/uv.lock index ccc443e..8842f95 100644 --- a/uv.lock +++ b/uv.lock @@ -13,7 +13,7 @@ wheels = [ [[package]] name = "edgee" -version = "0.1.1" +version = "1.0.0" source = { editable = "." } [package.optional-dependencies] From db38a5cf06a4f3e33ac53f8b9f4d3725486ca80c Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 15:48:54 +0100 Subject: [PATCH 3/9] feat: add compression response field to SendResponse - Add Compression dataclass with input_tokens, saved_tokens, and rate fields - Add optional compression field to SendResponse for root-level compression data --- edgee/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/edgee/__init__.py b/edgee/__init__.py index 653a4df..40cd434 100644 --- a/edgee/__init__.py +++ b/edgee/__init__.py @@ -68,10 +68,18 @@ class Usage: total_tokens: int +@dataclass +class Compression: + input_tokens: int + saved_tokens: int + rate: float + + @dataclass class SendResponse: choices: list[Choice] usage: Usage | None = None + compression: Compression | None = None @property def text(self) -> str | None: From 9b320ffded5ed61ebce4a1978655c6cb295cd6a4 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:04:16 +0100 Subject: [PATCH 4/9] test: add compression response field tests - Add test for response with compression field - Add test for response without compression field - Update _handle_non_streaming_response to parse compression data --- edgee/__init__.py | 10 +++++++++- tests/test_edgee.py | 47 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/edgee/__init__.py b/edgee/__init__.py index 40cd434..b8d5520 100644 --- a/edgee/__init__.py +++ b/edgee/__init__.py @@ -276,7 +276,15 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse: total_tokens=data["usage"]["total_tokens"], ) - return SendResponse(choices=choices, usage=usage) + compression = None + if "compression" in data: + compression = Compression( + input_tokens=data["compression"]["input_tokens"], + saved_tokens=data["compression"]["saved_tokens"], + rate=data["compression"]["rate"], + ) + + return SendResponse(choices=choices, usage=usage, compression=compression) def _handle_streaming_response(self, request: Request): """Handle streaming response, yielding StreamChunk objects.""" diff --git a/tests/test_edgee.py b/tests/test_edgee.py index 048a6b7..520d714 100644 --- a/tests/test_edgee.py +++ b/tests/test_edgee.py @@ -306,3 +306,50 @@ def test_config_base_url_overrides_env(self, mock_urlopen): call_args = mock_urlopen.call_args[0][0] assert call_args.full_url == f"{config_base_url}/v1/chat/completions" + + @patch("edgee.urlopen") + def test_send_with_compression_response(self, mock_urlopen): + """Should handle response with compression field""" + mock_response_data = { + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Response"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}, + "compression": { + "input_tokens": 100, + "saved_tokens": 42, + "rate": 0.6102003642987249, + }, + } + mock_urlopen.return_value = self._mock_response(mock_response_data) + + client = Edgee("test-api-key") + result = client.send(model="gpt-4", input="Test") + + assert result.compression is not None + assert result.compression.input_tokens == 100 + assert result.compression.saved_tokens == 42 + assert result.compression.rate == 0.6102003642987249 + + @patch("edgee.urlopen") + def test_send_without_compression_response(self, mock_urlopen): + """Should handle response without compression field""" + mock_response_data = { + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Response"}, + "finish_reason": "stop", + } + ], + } + mock_urlopen.return_value = self._mock_response(mock_response_data) + + client = Edgee("test-api-key") + result = client.send(model="gpt-4", input="Test") + + assert result.compression is None From 0ba06cd33f69724cdba3e4ece6e766d0334b4d4e Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:15:36 +0100 Subject: [PATCH 5/9] docs: add compression field documentation to README - Add example showing how to access compression data in responses - Add compression info to features list --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 86fcc3c..b179649 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,15 @@ response = edgee.send( print(response.text) # Text content print(response.finish_reason) # Finish reason print(response.tool_calls) # Tool calls (if any) + +# Access usage and compression info +if response.usage: + print(f"Tokens used: {response.usage.total_tokens}") + +if response.compression: + print(f"Input tokens: {response.compression.input_tokens}") + print(f"Saved tokens: {response.compression.saved_tokens}") + print(f"Compression rate: {response.compression.rate}") ``` ## Stream Method @@ -64,6 +73,7 @@ for chunk in edgee.stream("gpt-4o", "Tell me a story"): - ✅ **Streaming** - Real-time response streaming with generators - ✅ **Tool calling** - Full support for function calling - ✅ **Flexible input** - Accept strings, dicts, or InputObject +- ✅ **Compression info** - Access token compression metrics in responses - ✅ **Zero dependencies** - Uses only Python standard library ## Documentation From ea3c90953b6699a1200834445be538a2268a8d29 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:23:06 +0100 Subject: [PATCH 6/9] docs: add compression example - Add example showing how to enable compression and set compression rate - Demonstrate accessing compression metrics from response - Show usage information alongside compression data --- example/compression.py | 61 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 example/compression.py diff --git a/example/compression.py b/example/compression.py new file mode 100644 index 0000000..c24cf39 --- /dev/null +++ b/example/compression.py @@ -0,0 +1,61 @@ +"""Example: Token compression with Edgee Gateway SDK + +This example demonstrates how to: +1. Enable compression for a request +2. Set a custom compression rate +3. Access compression metrics from the response +""" + +import os +import sys + +# Add parent directory to path for local testing +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from edgee import Edgee + +# Initialize the client +edgee = Edgee(os.environ.get("EDGEE_API_KEY")) + +print("=" * 70) +print("Edgee Token Compression Example") +print("=" * 70) +print() + +# Example 1: Request with compression enabled +print("Example 1: Request with compression enabled") +print("-" * 70) +response = edgee.send( + model="gpt-4o", + input={ + "messages": [{"role": "user", "content": "Explain quantum computing in simple terms."}], + "enable_compression": True, + "compression_rate": 0.5, + }, +) + +print(f"Response: {response.text}") +print() + +# Display usage information +if response.usage: + print("Token Usage:") + print(f" Prompt tokens: {response.usage.prompt_tokens}") + print(f" Completion tokens: {response.usage.completion_tokens}") + print(f" Total tokens: {response.usage.total_tokens}") + print() + +# Display compression information +if response.compression: + print("Compression Metrics:") + print(f" Input tokens: {response.compression.input_tokens}") + print(f" Saved tokens: {response.compression.saved_tokens}") + print(f" Compression rate: {response.compression.rate:.2%}") + print(f" Token savings: {response.compression.saved_tokens} tokens saved!") +else: + print("No compression data available in response.") + print("Note: Compression data is only returned when compression is enabled") + print(" and supported by your API key configuration.") + +print() +print("=" * 70) From a60858348af85617ac96f87394050a833f64f33e Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:26:12 +0100 Subject: [PATCH 7/9] docs: update compression example with larger input context - Add substantial AI history document as context (~3000+ chars) - Demonstrate meaningful compression on large input - Show percentage of tokens saved - Explain that compression works on input tokens --- example/compression.py | 79 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 5 deletions(-) diff --git a/example/compression.py b/example/compression.py index c24cf39..a9d2ceb 100644 --- a/example/compression.py +++ b/example/compression.py @@ -1,9 +1,12 @@ """Example: Token compression with Edgee Gateway SDK This example demonstrates how to: -1. Enable compression for a request +1. Enable compression for a request with a large input context 2. Set a custom compression rate 3. Access compression metrics from the response + +Note: Compression works on INPUT tokens, so this example includes a large +context document to demonstrate meaningful compression savings. """ import os @@ -17,18 +20,73 @@ # Initialize the client edgee = Edgee(os.environ.get("EDGEE_API_KEY")) +# Large context document to demonstrate input compression +LARGE_CONTEXT = """ +The History and Impact of Artificial Intelligence + +Artificial intelligence (AI) has evolved from a theoretical concept to a +transformative technology that influences nearly every aspect of modern life. +The field began in earnest in the 1950s when pioneers like Alan Turing and +John McCarthy laid the groundwork for machine intelligence. + +Early developments focused on symbolic reasoning and expert systems. These +rule-based approaches dominated the field through the 1970s and 1980s, with +systems like MYCIN demonstrating practical applications in medical diagnosis. +However, these early systems were limited by their inability to learn from data +and adapt to new situations. + +The resurgence of neural networks in the 1980s and 1990s, particularly with +backpropagation algorithms, opened new possibilities. Yet it wasn't until the +2010s, with the advent of deep learning and the availability of massive datasets +and computational power, that AI truly began to revolutionize industries. + +Modern AI applications span numerous domains: +- Natural language processing enables machines to understand and generate human language +- Computer vision allows machines to interpret visual information from the world +- Robotics combines AI with mechanical systems for autonomous operation +- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment +- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment +- Transportation is being transformed by autonomous vehicles and traffic optimization + +The development of large language models like GPT, BERT, and others has +particularly accelerated progress in natural language understanding and generation. +These models, trained on vast amounts of text data, can perform a wide range of +language tasks with remarkable proficiency. + +Despite remarkable progress, significant challenges remain. Issues of bias, +interpretability, safety, and ethical considerations continue to be areas of +active research and debate. The AI community is working to ensure that these +powerful technologies are developed and deployed responsibly, with consideration +for their societal impact. + +Looking forward, AI is expected to continue advancing rapidly, with potential +breakthroughs in areas like artificial general intelligence, quantum machine +learning, and brain-computer interfaces. The integration of AI into daily life +will likely deepen, raising important questions about human-AI collaboration, +workforce transformation, and the future of human cognition itself. +""" + print("=" * 70) print("Edgee Token Compression Example") print("=" * 70) print() -# Example 1: Request with compression enabled -print("Example 1: Request with compression enabled") +# Example: Request with compression enabled and large input +print("Example: Large context with compression enabled") print("-" * 70) +print(f"Input context length: {len(LARGE_CONTEXT)} characters") +print() + response = edgee.send( model="gpt-4o", input={ - "messages": [{"role": "user", "content": "Explain quantum computing in simple terms."}], + "messages": [ + {"role": "system", "content": LARGE_CONTEXT}, + { + "role": "user", + "content": "Based on the context above, summarize the key milestones in AI development in 3 bullet points.", + }, + ], "enable_compression": True, "compression_rate": 0.5, }, @@ -51,7 +109,18 @@ print(f" Input tokens: {response.compression.input_tokens}") print(f" Saved tokens: {response.compression.saved_tokens}") print(f" Compression rate: {response.compression.rate:.2%}") - print(f" Token savings: {response.compression.saved_tokens} tokens saved!") + savings_pct = ( + (response.compression.saved_tokens / response.compression.input_tokens * 100) + if response.compression.input_tokens > 0 + else 0 + ) + print(f" Savings: {savings_pct:.1f}% of input tokens saved!") + print() + print(f" 💡 Without compression, this request would have used") + print(f" {response.compression.input_tokens} input tokens.") + print( + f" With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!" + ) else: print("No compression data available in response.") print("Note: Compression data is only returned when compression is enabled") From 0cdb016572697f121e52be498391d1431c68a503 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:35:03 +0100 Subject: [PATCH 8/9] fix: move large context to user message in compression example - Only USER messages are compressed, not system messages - Update example to put context in user message - Add clarifying comment about compression behavior --- example/compression.py | 63 +++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/example/compression.py b/example/compression.py index a9d2ceb..814be70 100644 --- a/example/compression.py +++ b/example/compression.py @@ -5,8 +5,9 @@ 2. Set a custom compression rate 3. Access compression metrics from the response -Note: Compression works on INPUT tokens, so this example includes a large -context document to demonstrate meaningful compression savings. +IMPORTANT: Only USER messages are compressed. System messages are not compressed. +This example includes a large context in the user message to demonstrate meaningful +compression savings. """ import os @@ -24,20 +25,20 @@ LARGE_CONTEXT = """ The History and Impact of Artificial Intelligence -Artificial intelligence (AI) has evolved from a theoretical concept to a -transformative technology that influences nearly every aspect of modern life. -The field began in earnest in the 1950s when pioneers like Alan Turing and +Artificial intelligence (AI) has evolved from a theoretical concept to a +transformative technology that influences nearly every aspect of modern life. +The field began in earnest in the 1950s when pioneers like Alan Turing and John McCarthy laid the groundwork for machine intelligence. -Early developments focused on symbolic reasoning and expert systems. These -rule-based approaches dominated the field through the 1970s and 1980s, with -systems like MYCIN demonstrating practical applications in medical diagnosis. -However, these early systems were limited by their inability to learn from data +Early developments focused on symbolic reasoning and expert systems. These +rule-based approaches dominated the field through the 1970s and 1980s, with +systems like MYCIN demonstrating practical applications in medical diagnosis. +However, these early systems were limited by their inability to learn from data and adapt to new situations. -The resurgence of neural networks in the 1980s and 1990s, particularly with -backpropagation algorithms, opened new possibilities. Yet it wasn't until the -2010s, with the advent of deep learning and the availability of massive datasets +The resurgence of neural networks in the 1980s and 1990s, particularly with +backpropagation algorithms, opened new possibilities. Yet it wasn't until the +2010s, with the advent of deep learning and the availability of massive datasets and computational power, that AI truly began to revolutionize industries. Modern AI applications span numerous domains: @@ -48,21 +49,21 @@ - Finance leverages AI for fraud detection, algorithmic trading, and risk assessment - Transportation is being transformed by autonomous vehicles and traffic optimization -The development of large language models like GPT, BERT, and others has -particularly accelerated progress in natural language understanding and generation. -These models, trained on vast amounts of text data, can perform a wide range of +The development of large language models like GPT, BERT, and others has +particularly accelerated progress in natural language understanding and generation. +These models, trained on vast amounts of text data, can perform a wide range of language tasks with remarkable proficiency. -Despite remarkable progress, significant challenges remain. Issues of bias, -interpretability, safety, and ethical considerations continue to be areas of -active research and debate. The AI community is working to ensure that these -powerful technologies are developed and deployed responsibly, with consideration +Despite remarkable progress, significant challenges remain. Issues of bias, +interpretability, safety, and ethical considerations continue to be areas of +active research and debate. The AI community is working to ensure that these +powerful technologies are developed and deployed responsibly, with consideration for their societal impact. -Looking forward, AI is expected to continue advancing rapidly, with potential -breakthroughs in areas like artificial general intelligence, quantum machine -learning, and brain-computer interfaces. The integration of AI into daily life -will likely deepen, raising important questions about human-AI collaboration, +Looking forward, AI is expected to continue advancing rapidly, with potential +breakthroughs in areas like artificial general intelligence, quantum machine +learning, and brain-computer interfaces. The integration of AI into daily life +will likely deepen, raising important questions about human-AI collaboration, workforce transformation, and the future of human cognition itself. """ @@ -72,20 +73,24 @@ print() # Example: Request with compression enabled and large input -print("Example: Large context with compression enabled") +print("Example: Large user message with compression enabled") print("-" * 70) print(f"Input context length: {len(LARGE_CONTEXT)} characters") print() +# NOTE: Only USER messages are compressed +# Put the large context in the user message to demonstrate compression +user_message = f"""Here is some context about AI: + +{LARGE_CONTEXT} + +Based on this context, summarize the key milestones in AI development in 3 bullet points.""" + response = edgee.send( model="gpt-4o", input={ "messages": [ - {"role": "system", "content": LARGE_CONTEXT}, - { - "role": "user", - "content": "Based on the context above, summarize the key milestones in AI development in 3 bullet points.", - }, + {"role": "user", "content": user_message}, ], "enable_compression": True, "compression_rate": 0.5, From f4e9f2b8fcf16912809d073a14f161a7c9e48667 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:37:48 +0100 Subject: [PATCH 9/9] ruff --- example/compression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/compression.py b/example/compression.py index 814be70..d446279 100644 --- a/example/compression.py +++ b/example/compression.py @@ -121,7 +121,7 @@ ) print(f" Savings: {savings_pct:.1f}% of input tokens saved!") print() - print(f" 💡 Without compression, this request would have used") + print(" 💡 Without compression, this request would have used") print(f" {response.compression.input_tokens} input tokens.") print( f" With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!"