Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ response = edgee.send(
print(response.text) # Text content
print(response.finish_reason) # Finish reason
print(response.tool_calls) # Tool calls (if any)

# Access usage and compression info
if response.usage:
print(f"Tokens used: {response.usage.total_tokens}")

if response.compression:
print(f"Input tokens: {response.compression.input_tokens}")
print(f"Saved tokens: {response.compression.saved_tokens}")
print(f"Compression rate: {response.compression.rate}")
```

## Stream Method
Expand All @@ -64,6 +73,7 @@ for chunk in edgee.stream("gpt-4o", "Tell me a story"):
- ✅ **Streaming** - Real-time response streaming with generators
- ✅ **Tool calling** - Full support for function calling
- ✅ **Flexible input** - Accept strings, dicts, or InputObject
- ✅ **Compression info** - Access token compression metrics in responses
- ✅ **Zero dependencies** - Uses only Python standard library

## Documentation
Expand Down
34 changes: 33 additions & 1 deletion edgee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ class InputObject:
tools: list[dict] | None = None
tool_choice: str | dict | None = None
tags: list[str] | None = None
enable_compression: bool | None = (
None # Enable token compression (gateway-internal, not sent to providers)
)
compression_rate: float | None = (
None # Compression rate 0.0-1.0 (gateway-internal, not sent to providers)
)


@dataclass
Expand All @@ -62,10 +68,18 @@ class Usage:
total_tokens: int


@dataclass
class Compression:
input_tokens: int
saved_tokens: int
rate: float


@dataclass
class SendResponse:
choices: list[Choice]
usage: Usage | None = None
compression: Compression | None = None

@property
def text(self) -> str | None:
Expand Down Expand Up @@ -190,16 +204,22 @@ def send(
tools = None
tool_choice = None
tags = None
enable_compression = None
compression_rate = None
elif isinstance(input, InputObject):
messages = input.messages
tools = input.tools
tool_choice = input.tool_choice
tags = input.tags
enable_compression = input.enable_compression
compression_rate = input.compression_rate
else:
messages = input.get("messages", [])
tools = input.get("tools")
tool_choice = input.get("tool_choice")
tags = input.get("tags")
enable_compression = input.get("enable_compression")
compression_rate = input.get("compression_rate")

body: dict = {"model": model, "messages": messages}
if stream:
Expand All @@ -210,6 +230,10 @@ def send(
body["tool_choice"] = tool_choice
if tags:
body["tags"] = tags
if enable_compression is not None:
body["enable_compression"] = enable_compression
if compression_rate is not None:
body["compression_rate"] = compression_rate

request = Request(
f"{self.base_url}{API_ENDPOINT}",
Expand Down Expand Up @@ -252,7 +276,15 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse:
total_tokens=data["usage"]["total_tokens"],
)

return SendResponse(choices=choices, usage=usage)
compression = None
if "compression" in data:
compression = Compression(
input_tokens=data["compression"]["input_tokens"],
saved_tokens=data["compression"]["saved_tokens"],
rate=data["compression"]["rate"],
)

return SendResponse(choices=choices, usage=usage, compression=compression)

def _handle_streaming_response(self, request: Request):
"""Handle streaming response, yielding StreamChunk objects."""
Expand Down
135 changes: 135 additions & 0 deletions example/compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""Example: Token compression with Edgee Gateway SDK

This example demonstrates how to:
1. Enable compression for a request with a large input context
2. Set a custom compression rate
3. Access compression metrics from the response

IMPORTANT: Only USER messages are compressed. System messages are not compressed.
This example includes a large context in the user message to demonstrate meaningful
compression savings.
"""

import os
import sys

# Add parent directory to path for local testing
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from edgee import Edgee

# Initialize the client
edgee = Edgee(os.environ.get("EDGEE_API_KEY"))

# Large context document to demonstrate input compression
LARGE_CONTEXT = """
The History and Impact of Artificial Intelligence

Artificial intelligence (AI) has evolved from a theoretical concept to a
transformative technology that influences nearly every aspect of modern life.
The field began in earnest in the 1950s when pioneers like Alan Turing and
John McCarthy laid the groundwork for machine intelligence.

Early developments focused on symbolic reasoning and expert systems. These
rule-based approaches dominated the field through the 1970s and 1980s, with
systems like MYCIN demonstrating practical applications in medical diagnosis.
However, these early systems were limited by their inability to learn from data
and adapt to new situations.

The resurgence of neural networks in the 1980s and 1990s, particularly with
backpropagation algorithms, opened new possibilities. Yet it wasn't until the
2010s, with the advent of deep learning and the availability of massive datasets
and computational power, that AI truly began to revolutionize industries.

Modern AI applications span numerous domains:
- Natural language processing enables machines to understand and generate human language
- Computer vision allows machines to interpret visual information from the world
- Robotics combines AI with mechanical systems for autonomous operation
- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment
- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment
- Transportation is being transformed by autonomous vehicles and traffic optimization

The development of large language models like GPT, BERT, and others has
particularly accelerated progress in natural language understanding and generation.
These models, trained on vast amounts of text data, can perform a wide range of
language tasks with remarkable proficiency.

Despite remarkable progress, significant challenges remain. Issues of bias,
interpretability, safety, and ethical considerations continue to be areas of
active research and debate. The AI community is working to ensure that these
powerful technologies are developed and deployed responsibly, with consideration
for their societal impact.

Looking forward, AI is expected to continue advancing rapidly, with potential
breakthroughs in areas like artificial general intelligence, quantum machine
learning, and brain-computer interfaces. The integration of AI into daily life
will likely deepen, raising important questions about human-AI collaboration,
workforce transformation, and the future of human cognition itself.
"""

print("=" * 70)
print("Edgee Token Compression Example")
print("=" * 70)
print()

# Example: Request with compression enabled and large input
print("Example: Large user message with compression enabled")
print("-" * 70)
print(f"Input context length: {len(LARGE_CONTEXT)} characters")
print()

# NOTE: Only USER messages are compressed
# Put the large context in the user message to demonstrate compression
user_message = f"""Here is some context about AI:

{LARGE_CONTEXT}

Based on this context, summarize the key milestones in AI development in 3 bullet points."""

response = edgee.send(
model="gpt-4o",
input={
"messages": [
{"role": "user", "content": user_message},
],
"enable_compression": True,
"compression_rate": 0.5,
},
)

print(f"Response: {response.text}")
print()

# Display usage information
if response.usage:
print("Token Usage:")
print(f" Prompt tokens: {response.usage.prompt_tokens}")
print(f" Completion tokens: {response.usage.completion_tokens}")
print(f" Total tokens: {response.usage.total_tokens}")
print()

# Display compression information
if response.compression:
print("Compression Metrics:")
print(f" Input tokens: {response.compression.input_tokens}")
print(f" Saved tokens: {response.compression.saved_tokens}")
print(f" Compression rate: {response.compression.rate:.2%}")
savings_pct = (
(response.compression.saved_tokens / response.compression.input_tokens * 100)
if response.compression.input_tokens > 0
else 0
)
print(f" Savings: {savings_pct:.1f}% of input tokens saved!")
print()
print(" 💡 Without compression, this request would have used")
print(f" {response.compression.input_tokens} input tokens.")
print(
f" With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!"
)
else:
print("No compression data available in response.")
print("Note: Compression data is only returned when compression is enabled")
print(" and supported by your API key configuration.")

print()
print("=" * 70)
47 changes: 47 additions & 0 deletions tests/test_edgee.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,50 @@ def test_config_base_url_overrides_env(self, mock_urlopen):

call_args = mock_urlopen.call_args[0][0]
assert call_args.full_url == f"{config_base_url}/v1/chat/completions"

@patch("edgee.urlopen")
def test_send_with_compression_response(self, mock_urlopen):
"""Should handle response with compression field"""
mock_response_data = {
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": "Response"},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150},
"compression": {
"input_tokens": 100,
"saved_tokens": 42,
"rate": 0.6102003642987249,
},
}
mock_urlopen.return_value = self._mock_response(mock_response_data)

client = Edgee("test-api-key")
result = client.send(model="gpt-4", input="Test")

assert result.compression is not None
assert result.compression.input_tokens == 100
assert result.compression.saved_tokens == 42
assert result.compression.rate == 0.6102003642987249

@patch("edgee.urlopen")
def test_send_without_compression_response(self, mock_urlopen):
"""Should handle response without compression field"""
mock_response_data = {
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": "Response"},
"finish_reason": "stop",
}
],
}
mock_urlopen.return_value = self._mock_response(mock_response_data)

client = Edgee("test-api-key")
result = client.send(model="gpt-4", input="Test")

assert result.compression is None
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading