From a9d16bc5c770bbbace58f9fdb340a81e63ad23ed Mon Sep 17 00:00:00 2001 From: "U-DESKTOP-877VI7G\\kylex" Date: Fri, 3 Apr 2026 03:13:19 -0700 Subject: [PATCH 1/2] Structured Output support Added structured output support for chat. Tested all model providers + streaming. We now also include the usage as part of the TextGenerationOutput. This was nice because it led to finding a bug in usage reporting for gemini in streaming. Unit tests included as well --- pyproject.toml | 2 +- src/opengradient/__init__.py | 2 + src/opengradient/client/llm.py | 22 ++++++++++- src/opengradient/types.py | 68 ++++++++++++++++++++++++++++++++++ uv.lock | 4 +- 5 files changed, 94 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 17512b8a..4fd346c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "opengradient" -version = "0.9.4" +version = "0.9.5" description = "Python SDK for OpenGradient decentralized model management & inference services" authors = [{name = "OpenGradient", email = "adam@vannalabs.ai"}] readme = "README.md" diff --git a/src/opengradient/__init__.py b/src/opengradient/__init__.py index 89d5ff08..a198203d 100644 --- a/src/opengradient/__init__.py +++ b/src/opengradient/__init__.py @@ -88,6 +88,7 @@ async def stream_example(): InferenceResult, ModelOutput, ModelRepository, + ResponseFormat, SchedulerParams, TextGenerationOutput, TextGenerationStream, @@ -105,6 +106,7 @@ async def stream_example(): "SchedulerParams", "CandleType", "CandleOrder", + "ResponseFormat", "TextGenerationOutput", "TextGenerationStream", "x402SettlementMode", diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py index ed54fd99..326a4ba9 100644 --- a/src/opengradient/client/llm.py +++ b/src/opengradient/client/llm.py @@ -14,7 +14,7 @@ from x402.mechanisms.evm.exact.register import register_exact_evm_client from x402.mechanisms.evm.upto.register import register_upto_evm_client -from ..types import TEE_LLM, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode +from ..types import TEE_LLM, ResponseFormat, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode from .opg_token import Permit2ApprovalResult, ensure_opg_approval from .tee_connection import RegistryTEEConnection, StaticTEEConnection, TEEConnectionInterface from .tee_registry import TEERegistry @@ -44,6 +44,7 @@ class _ChatParams: stop_sequence: Optional[List[str]] tools: Optional[List[Dict]] tool_choice: Optional[str] + response_format: Optional[ResponseFormat] x402_settlement_mode: x402SettlementMode @@ -152,6 +153,8 @@ def _chat_payload(self, params: _ChatParams, messages: List[Dict], stream: bool if params.tools: payload["tools"] = params.tools payload["tool_choice"] = params.tool_choice or "auto" + if params.response_format: + payload["response_format"] = params.response_format.to_dict() return payload async def _call_with_tee_retry( @@ -297,6 +300,7 @@ async def chat( temperature: float = 0.0, tools: Optional[List[Dict]] = None, tool_choice: Optional[str] = None, + response_format: Optional[ResponseFormat] = None, x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, stream: bool = False, ) -> Union[TextGenerationOutput, AsyncGenerator[StreamChunk, None]]: @@ -311,6 +315,11 @@ async def chat( temperature (float): Temperature for LLM inference, between 0 and 1. tools (List[dict], optional): Set of tools for function calling. tool_choice (str, optional): Sets a specific tool to choose. + response_format (ResponseFormat, optional): Enforce a specific output format. + Use ``ResponseFormat(type="json_object")`` for any valid JSON (not supported + by Anthropic models). Use ``ResponseFormat(type="json_schema", json_schema={...})`` + to enforce a strict schema (supported by all providers including Anthropic). + Defaults to None (plain text). x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments. - PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving). - BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient). @@ -324,8 +333,17 @@ async def chat( - If stream=True: Async generator yielding StreamChunk objects Raises: + ValueError: If ``response_format="json_object"`` is used with an Anthropic model. RuntimeError: If the inference fails. """ + if response_format is not None and response_format.type == "json_object": + provider = model.split("/")[0] + if provider == "anthropic": + raise ValueError( + "Anthropic models do not support response_format type 'json_object'. " + "Use ResponseFormat(type='json_schema', json_schema={...}) with an explicit schema instead." + ) + params = _ChatParams( model=model.split("/")[1], max_tokens=max_tokens, @@ -333,6 +351,7 @@ async def chat( stop_sequence=stop_sequence, tools=tools, tool_choice=tool_choice, + response_format=response_format, x402_settlement_mode=x402_settlement_mode, ) @@ -379,6 +398,7 @@ async def _request() -> TextGenerationOutput: transaction_hash="external", finish_reason=choices[0].get("finish_reason"), chat_output=message, + usage=result.get("usage"), tee_signature=result.get("tee_signature"), tee_timestamp=result.get("tee_timestamp"), **tee.metadata(), diff --git a/src/opengradient/types.py b/src/opengradient/types.py index a59293fa..035c67db 100644 --- a/src/opengradient/types.py +++ b/src/opengradient/types.py @@ -428,6 +428,9 @@ class TextGenerationOutput: completion_output: Optional[str] = None """Raw text returned by a completion request.""" + usage: Optional[Dict] = None + """Token usage for the request. Contains ``prompt_tokens``, ``completion_tokens``, and ``total_tokens`` when reported by the server.""" + payment_hash: Optional[str] = None """Payment hash for the x402 transaction.""" @@ -526,6 +529,71 @@ class TEE_LLM(str, Enum): GROK_4_1_FAST_NON_REASONING = "x-ai/grok-4-1-fast-non-reasoning" +@dataclass +class ResponseFormat: + """Controls the output format enforced by the TEE gateway. + + Use ``type="json_object"`` to receive any valid JSON object (supported by + OpenAI, Gemini, and Grok). Use ``type="json_schema"`` with a ``json_schema`` + definition to enforce a specific schema (supported by all providers, + including Anthropic). + + Attributes: + type: One of ``"text"``, ``"json_object"``, or ``"json_schema"``. + json_schema: Schema definition (required when ``type="json_schema"``). + Must contain ``name`` (str) and ``schema`` (dict). + ``strict`` (bool) is optional. + + Raises: + ValueError: If ``type`` is not a recognised value, or if + ``type="json_schema"`` is used without providing ``json_schema``. + + Examples:: + + # Any valid JSON object — OpenAI, Gemini, Grok only + ResponseFormat(type="json_object") + + # Strict schema — all providers including Anthropic + ResponseFormat( + type="json_schema", + json_schema={ + "name": "person", + "strict": True, + "schema": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + "required": ["name", "age"], + "additionalProperties": False, + }, + }, + ) + """ + + type: str + json_schema: Optional[Dict] = None + + def __post_init__(self) -> None: + valid_types = ("text", "json_object", "json_schema") + if self.type not in valid_types: + raise ValueError( + f"ResponseFormat.type must be one of {valid_types}, got '{self.type}'" + ) + if self.type == "json_schema" and not self.json_schema: + raise ValueError( + "ResponseFormat.json_schema is required when type='json_schema'" + ) + + def to_dict(self) -> Dict: + """Serialise to a JSON-compatible dict for the TEE gateway request payload.""" + d: Dict = {"type": self.type} + if self.json_schema is not None: + d["json_schema"] = self.json_schema + return d + + @dataclass class SchedulerParams: frequency: int diff --git a/uv.lock b/uv.lock index 6d0327a6..3ab6b1b6 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.11" resolution-markers = [ "python_full_version >= '3.14'", @@ -1867,7 +1867,7 @@ wheels = [ [[package]] name = "opengradient" -version = "0.9.3" +version = "0.9.5" source = { editable = "." } dependencies = [ { name = "click" }, From 35acd982d8fc84caae6ae5db507ad7824c30d1ab Mon Sep 17 00:00:00 2001 From: Kyle Qian Date: Fri, 3 Apr 2026 03:17:19 -0700 Subject: [PATCH 2/2] Update pyproject.toml and uv.lock to both match newest version --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 34754b4e..8cee4527 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "opengradient" -version = "0.9.6" +version = "0.9.7" description = "Python SDK for OpenGradient decentralized model management & inference services" authors = [{name = "OpenGradient", email = "adam@vannalabs.ai"}] readme = "README.md" diff --git a/uv.lock b/uv.lock index 3ab6b1b6..dc010f2d 100644 --- a/uv.lock +++ b/uv.lock @@ -1867,7 +1867,7 @@ wheels = [ [[package]] name = "opengradient" -version = "0.9.5" +version = "0.9.7" source = { editable = "." } dependencies = [ { name = "click" },