diff --git a/README.md b/README.md index 86fcc3c..b179649 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,15 @@ response = edgee.send( print(response.text) # Text content print(response.finish_reason) # Finish reason print(response.tool_calls) # Tool calls (if any) + +# Access usage and compression info +if response.usage: + print(f"Tokens used: {response.usage.total_tokens}") + +if response.compression: + print(f"Input tokens: {response.compression.input_tokens}") + print(f"Saved tokens: {response.compression.saved_tokens}") + print(f"Compression rate: {response.compression.rate}") ``` ## Stream Method @@ -64,6 +73,7 @@ for chunk in edgee.stream("gpt-4o", "Tell me a story"): - ✅ **Streaming** - Real-time response streaming with generators - ✅ **Tool calling** - Full support for function calling - ✅ **Flexible input** - Accept strings, dicts, or InputObject +- ✅ **Compression info** - Access token compression metrics in responses - ✅ **Zero dependencies** - Uses only Python standard library ## Documentation diff --git a/edgee/__init__.py b/edgee/__init__.py index 65f5dc2..b8d5520 100644 --- a/edgee/__init__.py +++ b/edgee/__init__.py @@ -46,6 +46,12 @@ class InputObject: tools: list[dict] | None = None tool_choice: str | dict | None = None tags: list[str] | None = None + enable_compression: bool | None = ( + None # Enable token compression (gateway-internal, not sent to providers) + ) + compression_rate: float | None = ( + None # Compression rate 0.0-1.0 (gateway-internal, not sent to providers) + ) @dataclass @@ -62,10 +68,18 @@ class Usage: total_tokens: int +@dataclass +class Compression: + input_tokens: int + saved_tokens: int + rate: float + + @dataclass class SendResponse: choices: list[Choice] usage: Usage | None = None + compression: Compression | None = None @property def text(self) -> str | None: @@ -190,16 +204,22 @@ def send( tools = None tool_choice = None tags = None + enable_compression = None + compression_rate = None elif isinstance(input, InputObject): messages = input.messages tools = input.tools tool_choice = input.tool_choice tags = input.tags + enable_compression = input.enable_compression + compression_rate = input.compression_rate else: messages = input.get("messages", []) tools = input.get("tools") tool_choice = input.get("tool_choice") tags = input.get("tags") + enable_compression = input.get("enable_compression") + compression_rate = input.get("compression_rate") body: dict = {"model": model, "messages": messages} if stream: @@ -210,6 +230,10 @@ def send( body["tool_choice"] = tool_choice if tags: body["tags"] = tags + if enable_compression is not None: + body["enable_compression"] = enable_compression + if compression_rate is not None: + body["compression_rate"] = compression_rate request = Request( f"{self.base_url}{API_ENDPOINT}", @@ -252,7 +276,15 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse: total_tokens=data["usage"]["total_tokens"], ) - return SendResponse(choices=choices, usage=usage) + compression = None + if "compression" in data: + compression = Compression( + input_tokens=data["compression"]["input_tokens"], + saved_tokens=data["compression"]["saved_tokens"], + rate=data["compression"]["rate"], + ) + + return SendResponse(choices=choices, usage=usage, compression=compression) def _handle_streaming_response(self, request: Request): """Handle streaming response, yielding StreamChunk objects.""" diff --git a/example/compression.py b/example/compression.py new file mode 100644 index 0000000..d446279 --- /dev/null +++ b/example/compression.py @@ -0,0 +1,135 @@ +"""Example: Token compression with Edgee Gateway SDK + +This example demonstrates how to: +1. Enable compression for a request with a large input context +2. Set a custom compression rate +3. Access compression metrics from the response + +IMPORTANT: Only USER messages are compressed. System messages are not compressed. +This example includes a large context in the user message to demonstrate meaningful +compression savings. +""" + +import os +import sys + +# Add parent directory to path for local testing +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from edgee import Edgee + +# Initialize the client +edgee = Edgee(os.environ.get("EDGEE_API_KEY")) + +# Large context document to demonstrate input compression +LARGE_CONTEXT = """ +The History and Impact of Artificial Intelligence + +Artificial intelligence (AI) has evolved from a theoretical concept to a +transformative technology that influences nearly every aspect of modern life. +The field began in earnest in the 1950s when pioneers like Alan Turing and +John McCarthy laid the groundwork for machine intelligence. + +Early developments focused on symbolic reasoning and expert systems. These +rule-based approaches dominated the field through the 1970s and 1980s, with +systems like MYCIN demonstrating practical applications in medical diagnosis. +However, these early systems were limited by their inability to learn from data +and adapt to new situations. + +The resurgence of neural networks in the 1980s and 1990s, particularly with +backpropagation algorithms, opened new possibilities. Yet it wasn't until the +2010s, with the advent of deep learning and the availability of massive datasets +and computational power, that AI truly began to revolutionize industries. + +Modern AI applications span numerous domains: +- Natural language processing enables machines to understand and generate human language +- Computer vision allows machines to interpret visual information from the world +- Robotics combines AI with mechanical systems for autonomous operation +- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment +- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment +- Transportation is being transformed by autonomous vehicles and traffic optimization + +The development of large language models like GPT, BERT, and others has +particularly accelerated progress in natural language understanding and generation. +These models, trained on vast amounts of text data, can perform a wide range of +language tasks with remarkable proficiency. + +Despite remarkable progress, significant challenges remain. Issues of bias, +interpretability, safety, and ethical considerations continue to be areas of +active research and debate. The AI community is working to ensure that these +powerful technologies are developed and deployed responsibly, with consideration +for their societal impact. + +Looking forward, AI is expected to continue advancing rapidly, with potential +breakthroughs in areas like artificial general intelligence, quantum machine +learning, and brain-computer interfaces. The integration of AI into daily life +will likely deepen, raising important questions about human-AI collaboration, +workforce transformation, and the future of human cognition itself. +""" + +print("=" * 70) +print("Edgee Token Compression Example") +print("=" * 70) +print() + +# Example: Request with compression enabled and large input +print("Example: Large user message with compression enabled") +print("-" * 70) +print(f"Input context length: {len(LARGE_CONTEXT)} characters") +print() + +# NOTE: Only USER messages are compressed +# Put the large context in the user message to demonstrate compression +user_message = f"""Here is some context about AI: + +{LARGE_CONTEXT} + +Based on this context, summarize the key milestones in AI development in 3 bullet points.""" + +response = edgee.send( + model="gpt-4o", + input={ + "messages": [ + {"role": "user", "content": user_message}, + ], + "enable_compression": True, + "compression_rate": 0.5, + }, +) + +print(f"Response: {response.text}") +print() + +# Display usage information +if response.usage: + print("Token Usage:") + print(f" Prompt tokens: {response.usage.prompt_tokens}") + print(f" Completion tokens: {response.usage.completion_tokens}") + print(f" Total tokens: {response.usage.total_tokens}") + print() + +# Display compression information +if response.compression: + print("Compression Metrics:") + print(f" Input tokens: {response.compression.input_tokens}") + print(f" Saved tokens: {response.compression.saved_tokens}") + print(f" Compression rate: {response.compression.rate:.2%}") + savings_pct = ( + (response.compression.saved_tokens / response.compression.input_tokens * 100) + if response.compression.input_tokens > 0 + else 0 + ) + print(f" Savings: {savings_pct:.1f}% of input tokens saved!") + print() + print(" 💡 Without compression, this request would have used") + print(f" {response.compression.input_tokens} input tokens.") + print( + f" With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!" + ) +else: + print("No compression data available in response.") + print("Note: Compression data is only returned when compression is enabled") + print(" and supported by your API key configuration.") + +print() +print("=" * 70) diff --git a/tests/test_edgee.py b/tests/test_edgee.py index 048a6b7..520d714 100644 --- a/tests/test_edgee.py +++ b/tests/test_edgee.py @@ -306,3 +306,50 @@ def test_config_base_url_overrides_env(self, mock_urlopen): call_args = mock_urlopen.call_args[0][0] assert call_args.full_url == f"{config_base_url}/v1/chat/completions" + + @patch("edgee.urlopen") + def test_send_with_compression_response(self, mock_urlopen): + """Should handle response with compression field""" + mock_response_data = { + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Response"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}, + "compression": { + "input_tokens": 100, + "saved_tokens": 42, + "rate": 0.6102003642987249, + }, + } + mock_urlopen.return_value = self._mock_response(mock_response_data) + + client = Edgee("test-api-key") + result = client.send(model="gpt-4", input="Test") + + assert result.compression is not None + assert result.compression.input_tokens == 100 + assert result.compression.saved_tokens == 42 + assert result.compression.rate == 0.6102003642987249 + + @patch("edgee.urlopen") + def test_send_without_compression_response(self, mock_urlopen): + """Should handle response without compression field""" + mock_response_data = { + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Response"}, + "finish_reason": "stop", + } + ], + } + mock_urlopen.return_value = self._mock_response(mock_response_data) + + client = Edgee("test-api-key") + result = client.send(model="gpt-4", input="Test") + + assert result.compression is None diff --git a/uv.lock b/uv.lock index ccc443e..8842f95 100644 --- a/uv.lock +++ b/uv.lock @@ -13,7 +13,7 @@ wheels = [ [[package]] name = "edgee" -version = "0.1.1" +version = "1.0.0" source = { editable = "." } [package.optional-dependencies]