From 0b6cb154337eeb8641a853bf6ff0b3b884c1c05a Mon Sep 17 00:00:00 2001 From: API Engineering Date: Wed, 25 Mar 2026 17:05:56 +0000 Subject: [PATCH] [bot] Updated client based on openapi-1e369f0/clientgen --- DO_OPENAPI_COMMIT_SHA.txt | 2 +- src/pydo/_client.py | 12 + src/pydo/aio/_client.py | 12 + src/pydo/aio/operations/__init__.py | 4 + src/pydo/aio/operations/_operations.py | 2661 ++++++++++++++++++++++ src/pydo/operations/__init__.py | 4 + src/pydo/operations/_operations.py | 2782 ++++++++++++++++++++++++ 7 files changed, 5476 insertions(+), 1 deletion(-) diff --git a/DO_OPENAPI_COMMIT_SHA.txt b/DO_OPENAPI_COMMIT_SHA.txt index 11ac4a0..76cae07 100644 --- a/DO_OPENAPI_COMMIT_SHA.txt +++ b/DO_OPENAPI_COMMIT_SHA.txt @@ -1 +1 @@ -ebfa95a +1e369f0 diff --git a/src/pydo/_client.py b/src/pydo/_client.py index 0eee1c6..c02a3cc 100644 --- a/src/pydo/_client.py +++ b/src/pydo/_client.py @@ -17,6 +17,7 @@ AccountOperations, ActionsOperations, AddonsOperations, + AgentInferenceOperations, AppsOperations, AutoscalepoolsOperations, BalanceOperations, @@ -36,6 +37,7 @@ GenaiOperations, ImageActionsOperations, ImagesOperations, + InferenceOperations, InvoicesOperations, KubernetesOperations, LoadBalancersOperations, @@ -674,6 +676,10 @@ class GeneratedClient: # pylint: disable=client-accepts-api-version-keyword,too :vartype uptime: pydo.operations.UptimeOperations :ivar genai: GenaiOperations operations :vartype genai: pydo.operations.GenaiOperations + :ivar inference: InferenceOperations operations + :vartype inference: pydo.operations.InferenceOperations + :ivar agent_inference: AgentInferenceOperations operations + :vartype agent_inference: pydo.operations.AgentInferenceOperations :param credential: Credential needed for the client to connect to Azure. Required. :type credential: ~azure.core.credentials.TokenCredential :keyword endpoint: Service URL. Default value is "https://api.digitalocean.com". @@ -866,6 +872,12 @@ def __init__( self.genai = GenaiOperations( self._client, self._config, self._serialize, self._deserialize ) + self.inference = InferenceOperations( + self._client, self._config, self._serialize, self._deserialize + ) + self.agent_inference = AgentInferenceOperations( + self._client, self._config, self._serialize, self._deserialize + ) def send_request( self, request: HttpRequest, *, stream: bool = False, **kwargs: Any diff --git a/src/pydo/aio/_client.py b/src/pydo/aio/_client.py index 64d498b..570fb5d 100644 --- a/src/pydo/aio/_client.py +++ b/src/pydo/aio/_client.py @@ -17,6 +17,7 @@ AccountOperations, ActionsOperations, AddonsOperations, + AgentInferenceOperations, AppsOperations, AutoscalepoolsOperations, BalanceOperations, @@ -36,6 +37,7 @@ GenaiOperations, ImageActionsOperations, ImagesOperations, + InferenceOperations, InvoicesOperations, KubernetesOperations, LoadBalancersOperations, @@ -674,6 +676,10 @@ class GeneratedClient: # pylint: disable=client-accepts-api-version-keyword,too :vartype uptime: pydo.aio.operations.UptimeOperations :ivar genai: GenaiOperations operations :vartype genai: pydo.aio.operations.GenaiOperations + :ivar inference: InferenceOperations operations + :vartype inference: pydo.aio.operations.InferenceOperations + :ivar agent_inference: AgentInferenceOperations operations + :vartype agent_inference: pydo.aio.operations.AgentInferenceOperations :param credential: Credential needed for the client to connect to Azure. Required. :type credential: ~azure.core.credentials_async.AsyncTokenCredential :keyword endpoint: Service URL. Default value is "https://api.digitalocean.com". @@ -866,6 +872,12 @@ def __init__( self.genai = GenaiOperations( self._client, self._config, self._serialize, self._deserialize ) + self.inference = InferenceOperations( + self._client, self._config, self._serialize, self._deserialize + ) + self.agent_inference = AgentInferenceOperations( + self._client, self._config, self._serialize, self._deserialize + ) def send_request( self, request: HttpRequest, *, stream: bool = False, **kwargs: Any diff --git a/src/pydo/aio/operations/__init__.py b/src/pydo/aio/operations/__init__.py index 0c8c048..4286825 100644 --- a/src/pydo/aio/operations/__init__.py +++ b/src/pydo/aio/operations/__init__.py @@ -54,6 +54,8 @@ from ._operations import VpcnatgatewaysOperations from ._operations import UptimeOperations from ._operations import GenaiOperations +from ._operations import InferenceOperations +from ._operations import AgentInferenceOperations from ._patch import __all__ as _patch_all from ._patch import * # pylint: disable=unused-wildcard-import @@ -110,6 +112,8 @@ "VpcnatgatewaysOperations", "UptimeOperations", "GenaiOperations", + "InferenceOperations", + "AgentInferenceOperations", ] __all__.extend([p for p in _patch_all if p not in __all__]) _patch_sdk() diff --git a/src/pydo/aio/operations/_operations.py b/src/pydo/aio/operations/_operations.py index 00b59bc..fee4c4f 100644 --- a/src/pydo/aio/operations/_operations.py +++ b/src/pydo/aio/operations/_operations.py @@ -47,6 +47,7 @@ build_addons_list_request, build_addons_patch_plan_request, build_addons_patch_request, + build_agent_inference_create_chat_completion_request, build_apps_assign_alert_destinations_request, build_apps_cancel_deployment_request, build_apps_cancel_event_request, @@ -346,6 +347,11 @@ build_images_get_request, build_images_list_request, build_images_update_request, + build_inference_create_async_invoke_request, + build_inference_create_chat_completion_request, + build_inference_create_image_request, + build_inference_create_response_request, + build_inference_list_models_request, build_invoices_get_by_uuid_request, build_invoices_get_csv_by_uuid_request, build_invoices_get_pdf_by_uuid_request, @@ -252674,3 +252680,2658 @@ async def list_evaluation_test_cases_by_workspace( return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore return cast(JSON, deserialized) # type: ignore + + +class InferenceOperations: + """ + .. warning:: + **DO NOT** instantiate this class directly. + + Instead, you should access the following operations through + :class:`~pydo.aio.GeneratedClient`'s + :attr:`inference` attribute. + """ + + def __init__(self, *args, **kwargs) -> None: + input_args = list(args) + self._client = input_args.pop(0) if input_args else kwargs.pop("client") + self._config = input_args.pop(0) if input_args else kwargs.pop("config") + self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer") + self._deserialize = ( + input_args.pop(0) if input_args else kwargs.pop("deserializer") + ) + + @overload + async def create_chat_completion( + self, body: JSON, *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation. + + :param body: Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "messages": [ + { + "role": "str", # The role of the message author. Required. + Known values are: "system", "developer", "user", "assistant", and "tool". + "content": "str", # Optional. The contents of the message. + "reasoning_content": "str", # Optional. The reasoning + content generated by the model (assistant messages only). + "refusal": "str", # Optional. The refusal message generated + by the model (assistant messages only). + "tool_call_id": "str", # Optional. Tool call that this + message is responding to (tool messages only). + "tool_calls": [ + { + "function": { + "arguments": "str", # The arguments + to call the function with, as generated by the model in JSON + format. Required. + "name": "str" # The name of the + function to call. Required. + }, + "id": "str", # The ID of the tool call. + Required. + "type": "str" # The type of the tool. + Currently, only function is supported. Required. "function" + } + ] + } + ], + "model": "str", # Model ID used to generate the response. Required. + "frequency_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on their existing frequency in + the text so far, decreasing the model's likelihood to repeat the same line + verbatim. + "logit_bias": { + "str": 0 # Optional. Modify the likelihood of specified tokens + appearing in the completion. Accepts a JSON object that maps tokens + (specified by their token ID in the tokenizer) to an associated bias value + from -100 to 100. Mathematically, the bias is added to the logits generated + by the model prior to sampling. The exact effect will vary per model, but + values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + }, + "logprobs": False, # Optional. Default value is False. Whether to return log + probabilities of the output tokens or not. If true, returns the log probabilities + of each output token returned in the content of message. + "max_completion_tokens": 0, # Optional. The maximum number of completion + tokens that may be used over the course of the run. The run will make a best + effort to use only the number of completion tokens specified, across multiple + turns of the run. + "max_tokens": 0, # Optional. The maximum number of tokens that can be + generated in the completion. The token count of your prompt plus max_tokens + cannot exceed the model's context length. + "metadata": { + "str": "str" # Optional. Set of 16 key-value pairs that can be + attached to an object. This can be useful for storing additional information + about the object in a structured format. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters. + }, + "n": 1, # Optional. Default value is 1. How many chat completion choices to + generate for each input message. Note that you will be charged based on the + number of generated tokens across all of the choices. Keep n as 1 to minimize + costs. + "presence_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. + "reasoning_effort": "str", # Optional. Constrains effort on reasoning for + reasoning models. Reducing reasoning effort can result in faster responses and + fewer tokens used on reasoning in a response. Known values are: "none", + "minimal", "low", "medium", "high", and "xhigh". + "seed": 0, # Optional. If specified, the system will make a best effort to + sample deterministically, such that repeated requests with the same seed and + parameters should return the same result. Determinism is not guaranteed. + "stop": {}, + "stream": False, # Optional. Default value is False. If set to true, the + model response data will be streamed to the client as it is generated using + server-sent events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data [DONE] message. The usage field on this chunk + shows the token usage statistics for the entire request, and the choices + field will always be an empty array. + }, + "temperature": 0.0, # Optional. What sampling temperature to use, between 0 + and 2. Higher values like 0.8 will make the output more random, while lower + values like 0.2 will make it more focused and deterministic. We generally + recommend altering this or top_p but not both. + "tool_choice": {}, + "tools": [ + { + "function": { + "name": "str", # The name of the function to be + called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. Required. + "description": "str", # Optional. A description of + what the function does, used by the model to choose when and how to + call the function. + "parameters": { + "str": {} # Optional. The parameters the + function accepts, described as a JSON Schema object. + } + }, + "type": "str" # The type of the tool. Currently, only + function is supported. Required. "function" + } + ], + "top_logprobs": 0, # Optional. An integer between 0 and 20 specifying the + number of most likely tokens to return at each token position, each with an + associated log probability. logprobs must be set to true if this parameter is + used. + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. So 0.1 means only the tokens comprising the top 10% + probability mass are considered. We generally recommend altering this or + temperature but not both. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + + @overload + async def create_chat_completion( + self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation. + + :param body: Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + + @distributed_trace_async + async def create_chat_completion( + self, body: Union[JSON, IO[bytes]], **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "messages": [ + { + "role": "str", # The role of the message author. Required. + Known values are: "system", "developer", "user", "assistant", and "tool". + "content": "str", # Optional. The contents of the message. + "reasoning_content": "str", # Optional. The reasoning + content generated by the model (assistant messages only). + "refusal": "str", # Optional. The refusal message generated + by the model (assistant messages only). + "tool_call_id": "str", # Optional. Tool call that this + message is responding to (tool messages only). + "tool_calls": [ + { + "function": { + "arguments": "str", # The arguments + to call the function with, as generated by the model in JSON + format. Required. + "name": "str" # The name of the + function to call. Required. + }, + "id": "str", # The ID of the tool call. + Required. + "type": "str" # The type of the tool. + Currently, only function is supported. Required. "function" + } + ] + } + ], + "model": "str", # Model ID used to generate the response. Required. + "frequency_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on their existing frequency in + the text so far, decreasing the model's likelihood to repeat the same line + verbatim. + "logit_bias": { + "str": 0 # Optional. Modify the likelihood of specified tokens + appearing in the completion. Accepts a JSON object that maps tokens + (specified by their token ID in the tokenizer) to an associated bias value + from -100 to 100. Mathematically, the bias is added to the logits generated + by the model prior to sampling. The exact effect will vary per model, but + values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + }, + "logprobs": False, # Optional. Default value is False. Whether to return log + probabilities of the output tokens or not. If true, returns the log probabilities + of each output token returned in the content of message. + "max_completion_tokens": 0, # Optional. The maximum number of completion + tokens that may be used over the course of the run. The run will make a best + effort to use only the number of completion tokens specified, across multiple + turns of the run. + "max_tokens": 0, # Optional. The maximum number of tokens that can be + generated in the completion. The token count of your prompt plus max_tokens + cannot exceed the model's context length. + "metadata": { + "str": "str" # Optional. Set of 16 key-value pairs that can be + attached to an object. This can be useful for storing additional information + about the object in a structured format. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters. + }, + "n": 1, # Optional. Default value is 1. How many chat completion choices to + generate for each input message. Note that you will be charged based on the + number of generated tokens across all of the choices. Keep n as 1 to minimize + costs. + "presence_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. + "reasoning_effort": "str", # Optional. Constrains effort on reasoning for + reasoning models. Reducing reasoning effort can result in faster responses and + fewer tokens used on reasoning in a response. Known values are: "none", + "minimal", "low", "medium", "high", and "xhigh". + "seed": 0, # Optional. If specified, the system will make a best effort to + sample deterministically, such that repeated requests with the same seed and + parameters should return the same result. Determinism is not guaranteed. + "stop": {}, + "stream": False, # Optional. Default value is False. If set to true, the + model response data will be streamed to the client as it is generated using + server-sent events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data [DONE] message. The usage field on this chunk + shows the token usage statistics for the entire request, and the choices + field will always be an empty array. + }, + "temperature": 0.0, # Optional. What sampling temperature to use, between 0 + and 2. Higher values like 0.8 will make the output more random, while lower + values like 0.2 will make it more focused and deterministic. We generally + recommend altering this or top_p but not both. + "tool_choice": {}, + "tools": [ + { + "function": { + "name": "str", # The name of the function to be + called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. Required. + "description": "str", # Optional. A description of + what the function does, used by the model to choose when and how to + call the function. + "parameters": { + "str": {} # Optional. The parameters the + function accepts, described as a JSON Schema object. + } + }, + "type": "str" # The type of the tool. Currently, only + function is supported. Required. "function" + } + ], + "top_logprobs": 0, # Optional. An integer between 0 and 20 specifying the + number of most likely tokens to return at each token position, each with an + associated log probability. logprobs must be set to true if this parameter is + used. + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. So 0.1 means only the tokens comprising the top 10% + probability mass are considered. We generally recommend altering this or + temperature but not both. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_inference_create_chat_completion_request( + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + await self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + await response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + @overload + async def create_image( + self, body: JSON, *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Generate images from text prompts. + + Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation + model with automatic prompt optimization and enhanced visual capabilities. + + :param body: Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "model": "str", # The model to use for image generation. Required. + "n": 0, # The number of images to generate. Must be between 1 and 10. + Required. + "prompt": "str", # A text description of the desired image(s). Supports up + to 32,000 characters and provides automatic prompt optimization for best results. + Required. + "background": "str", # Optional. The background setting for the image + generation. Supported values: transparent, opaque, auto. + "moderation": "str", # Optional. The moderation setting for the image + generation. Supported values: low, auto. + "output_compression": 0, # Optional. The output compression level for the + image generation (0-100). + "output_format": "str", # Optional. The output format for the image + generation. Supported values: png, webp, jpeg. + "partial_images": 0, # Optional. The number of partial image chunks to + return during streaming generation. Defaults to 0. When stream=true, this must be + greater than 0 to receive progressive updates of the image as it is being + generated. + "quality": "str", # Optional. The quality of the image that will be + generated. Supported values: auto, high, medium, low. + "size": "str", # Optional. The size of the generated images. GPT-IMAGE-1 + supports: auto (automatically select best size), 1536x1024 (landscape), 1024x1536 + (portrait). Known values are: "auto", "1536x1024", and "1024x1536". + "stream": False, # Optional. Default value is False. If set to true, partial + image data will be streamed as the image is being generated. The response will be + sent as server-sent events with partial image chunks. When stream is true, + partial_images must be greater than 0. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the images were + created. Required. + "data": [ + { + "b64_json": "str", # The base64-encoded JSON of the + generated image. Required. + "revised_prompt": "str" # Optional. The optimized prompt + that was used to generate the image. + } + ], + "background": "str", # Optional. The background setting used for the image + generation. + "output_format": "str", # Optional. The output format of the generated + image. + "quality": "str", # Optional. The quality setting used for the image + generation. + "size": "str", # Optional. The size of the generated image. + "usage": { + "input_tokens": 0, # The number of tokens (images and text) in the + input prompt. Required. + "input_tokens_details": { + "image_tokens": 0, # The number of image tokens in the input + prompt. Required. + "text_tokens": 0 # The number of text tokens in the input + prompt. Required. + }, + "output_tokens": 0, # The number of image tokens in the output + image. Required. + "total_tokens": 0 # The total number of tokens (images and text) + used for the image generation. Required. + } + } + """ + + @overload + async def create_image( + self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + """Generate images from text prompts. + + Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation + model with automatic prompt optimization and enhanced visual capabilities. + + :param body: Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the images were + created. Required. + "data": [ + { + "b64_json": "str", # The base64-encoded JSON of the + generated image. Required. + "revised_prompt": "str" # Optional. The optimized prompt + that was used to generate the image. + } + ], + "background": "str", # Optional. The background setting used for the image + generation. + "output_format": "str", # Optional. The output format of the generated + image. + "quality": "str", # Optional. The quality setting used for the image + generation. + "size": "str", # Optional. The size of the generated image. + "usage": { + "input_tokens": 0, # The number of tokens (images and text) in the + input prompt. Required. + "input_tokens_details": { + "image_tokens": 0, # The number of image tokens in the input + prompt. Required. + "text_tokens": 0 # The number of text tokens in the input + prompt. Required. + }, + "output_tokens": 0, # The number of image tokens in the output + image. Required. + "total_tokens": 0 # The total number of tokens (images and text) + used for the image generation. Required. + } + } + """ + + @distributed_trace_async + async def create_image(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON: + # pylint: disable=line-too-long + """Generate images from text prompts. + + Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation + model with automatic prompt optimization and enhanced visual capabilities. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "model": "str", # The model to use for image generation. Required. + "n": 0, # The number of images to generate. Must be between 1 and 10. + Required. + "prompt": "str", # A text description of the desired image(s). Supports up + to 32,000 characters and provides automatic prompt optimization for best results. + Required. + "background": "str", # Optional. The background setting for the image + generation. Supported values: transparent, opaque, auto. + "moderation": "str", # Optional. The moderation setting for the image + generation. Supported values: low, auto. + "output_compression": 0, # Optional. The output compression level for the + image generation (0-100). + "output_format": "str", # Optional. The output format for the image + generation. Supported values: png, webp, jpeg. + "partial_images": 0, # Optional. The number of partial image chunks to + return during streaming generation. Defaults to 0. When stream=true, this must be + greater than 0 to receive progressive updates of the image as it is being + generated. + "quality": "str", # Optional. The quality of the image that will be + generated. Supported values: auto, high, medium, low. + "size": "str", # Optional. The size of the generated images. GPT-IMAGE-1 + supports: auto (automatically select best size), 1536x1024 (landscape), 1024x1536 + (portrait). Known values are: "auto", "1536x1024", and "1024x1536". + "stream": False, # Optional. Default value is False. If set to true, partial + image data will be streamed as the image is being generated. The response will be + sent as server-sent events with partial image chunks. When stream is true, + partial_images must be greater than 0. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the images were + created. Required. + "data": [ + { + "b64_json": "str", # The base64-encoded JSON of the + generated image. Required. + "revised_prompt": "str" # Optional. The optimized prompt + that was used to generate the image. + } + ], + "background": "str", # Optional. The background setting used for the image + generation. + "output_format": "str", # Optional. The output format of the generated + image. + "quality": "str", # Optional. The quality setting used for the image + generation. + "size": "str", # Optional. The size of the generated image. + "usage": { + "input_tokens": 0, # The number of tokens (images and text) in the + input prompt. Required. + "input_tokens_details": { + "image_tokens": 0, # The number of image tokens in the input + prompt. Required. + "text_tokens": 0 # The number of text tokens in the input + prompt. Required. + }, + "output_tokens": 0, # The number of image tokens in the output + image. Required. + "total_tokens": 0 # The total number of tokens (images and text) + used for the image generation. Required. + } + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_inference_create_image_request( + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + await self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + await response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + @distributed_trace_async + async def list_models(self, **kwargs: Any) -> JSON: + """List available models. + + Lists the currently available models, and provides basic information about each one such as the + owner and availability. + + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "data": [ + { + "created": 0, # The Unix timestamp (in seconds) when the + model was created. Required. + "id": "str", # The model identifier, which can be referenced + in the API endpoints. Required. + "object": "str", # The object type, which is always "model". + Required. "model" + "owned_by": "str" # The organization that owns the model. + Required. + } + ], + "object": "str" # The object type, which is always "list". Required. "list" + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = kwargs.pop("headers", {}) or {} + _params = kwargs.pop("params", {}) or {} + + cls: ClsType[JSON] = kwargs.pop("cls", None) + + _request = build_inference_list_models_request( + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + await self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + await response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + @overload + async def create_response( + self, body: JSON, *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Send Prompt to a Model Using the Responses API. + + Generate text responses from text prompts. This endpoint supports both streaming and + non-streaming responses for supported text models. + + :param body: Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "input": {}, + "model": "str", # The model ID of the model you want to use. Get the model + ID using ``/v1/models`` or on the available models page. Required. + "instructions": "str", # Optional. System-level instructions for the model. + This sets the behavior and context for the response generation. + "max_output_tokens": 0, # Optional. The maximum number of tokens to generate + in the response. + "metadata": { + "str": "str" # Optional. Set of key-value pairs that can be attached + to the request. + }, + "stop": {}, + "stream": False, # Optional. Default value is False. Set to true to stream + partial responses as Server-Sent Events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data: [DONE] message with token usage statistics for + the entire request. + }, + "temperature": 0.0, # Optional. A value between 0.0 and 2.0 to control + randomness and creativity. Lower values like 0.2 make the output more focused and + deterministic, while higher values like 0.8 make it more random. + "tool_choice": {}, + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function to be + called. + "parameters": { + "str": {} # Optional. The parameters the function + accepts, described as a JSON Schema object. + } + } + ], + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. + "user": "str" # Optional. A unique identifier representing your end-user. + } + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the response was + created. Required. + "id": "str", # A unique identifier for the response. Required. + "model": "str", # The model used to generate the response. Required. + "object": "str", # The object type, which is always ``response``. Required. + "response" + "output": [ + { + "content": [ + { + "text": "str", # The text content. Required. + "type": "str" # The type of content part. + ``reasoning_text`` for reasoning content, ``output_text`` for + final output text. Required. Known values are: "reasoning_text" + and "output_text". + } + ], + "type": "str", # The type of output item. One of + ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known + values are: "reasoning", "message", and "function_call". + "arguments": "str", # Optional. JSON string of function + arguments (present when type is ``function_call``"" ). + "call_id": "str", # Optional. The unique ID of the function + tool call (present when type is ``function_call``"" ). + "id": "str", # Optional. The unique ID of the output item. + "name": "str", # Optional. The name of the function to call + (present when type is ``function_call``"" ). + "role": "str", # Optional. The role associated with this + output item (typically ``assistant``"" ). + "status": "str" # Optional. Status of the item. + } + ], + "usage": { + "input_tokens": 0, # The number of input tokens. Required. + "input_tokens_details": { + "cached_tokens": 0 # The number of tokens that were + retrieved from the cache. Required. + }, + "output_tokens": 0, # The number of output tokens. Required. + "output_tokens_details": { + "reasoning_tokens": 0, # The number of reasoning tokens. + Required. + "tool_output_tokens": 0 # The number of tool output tokens. + Required. + }, + "total_tokens": 0 # The total number of tokens used. Required. + }, + "max_output_tokens": 0, # Optional. Maximum output tokens setting. + "parallel_tool_calls": bool, # Optional. Whether parallel tool calls are + enabled. + "status": "str", # Optional. Status of the response. + "temperature": 0.0, # Optional. Temperature setting used for the response. + "tool_choice": "str", # Optional. Tool choice setting used for the response. + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function. + "parameters": { + "str": {} # Optional. The parameters the function + accepts. + } + } + ], + "top_p": 0.0, # Optional. Top-p setting used for the response. + "user": "str" # Optional. User identifier. + } + """ + + @overload + async def create_response( + self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Send Prompt to a Model Using the Responses API. + + Generate text responses from text prompts. This endpoint supports both streaming and + non-streaming responses for supported text models. + + :param body: Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the response was + created. Required. + "id": "str", # A unique identifier for the response. Required. + "model": "str", # The model used to generate the response. Required. + "object": "str", # The object type, which is always ``response``. Required. + "response" + "output": [ + { + "content": [ + { + "text": "str", # The text content. Required. + "type": "str" # The type of content part. + ``reasoning_text`` for reasoning content, ``output_text`` for + final output text. Required. Known values are: "reasoning_text" + and "output_text". + } + ], + "type": "str", # The type of output item. One of + ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known + values are: "reasoning", "message", and "function_call". + "arguments": "str", # Optional. JSON string of function + arguments (present when type is ``function_call``"" ). + "call_id": "str", # Optional. The unique ID of the function + tool call (present when type is ``function_call``"" ). + "id": "str", # Optional. The unique ID of the output item. + "name": "str", # Optional. The name of the function to call + (present when type is ``function_call``"" ). + "role": "str", # Optional. The role associated with this + output item (typically ``assistant``"" ). + "status": "str" # Optional. Status of the item. + } + ], + "usage": { + "input_tokens": 0, # The number of input tokens. Required. + "input_tokens_details": { + "cached_tokens": 0 # The number of tokens that were + retrieved from the cache. Required. + }, + "output_tokens": 0, # The number of output tokens. Required. + "output_tokens_details": { + "reasoning_tokens": 0, # The number of reasoning tokens. + Required. + "tool_output_tokens": 0 # The number of tool output tokens. + Required. + }, + "total_tokens": 0 # The total number of tokens used. Required. + }, + "max_output_tokens": 0, # Optional. Maximum output tokens setting. + "parallel_tool_calls": bool, # Optional. Whether parallel tool calls are + enabled. + "status": "str", # Optional. Status of the response. + "temperature": 0.0, # Optional. Temperature setting used for the response. + "tool_choice": "str", # Optional. Tool choice setting used for the response. + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function. + "parameters": { + "str": {} # Optional. The parameters the function + accepts. + } + } + ], + "top_p": 0.0, # Optional. Top-p setting used for the response. + "user": "str" # Optional. User identifier. + } + """ + + @distributed_trace_async + async def create_response( + self, body: Union[JSON, IO[bytes]], **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Send Prompt to a Model Using the Responses API. + + Generate text responses from text prompts. This endpoint supports both streaming and + non-streaming responses for supported text models. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "input": {}, + "model": "str", # The model ID of the model you want to use. Get the model + ID using ``/v1/models`` or on the available models page. Required. + "instructions": "str", # Optional. System-level instructions for the model. + This sets the behavior and context for the response generation. + "max_output_tokens": 0, # Optional. The maximum number of tokens to generate + in the response. + "metadata": { + "str": "str" # Optional. Set of key-value pairs that can be attached + to the request. + }, + "stop": {}, + "stream": False, # Optional. Default value is False. Set to true to stream + partial responses as Server-Sent Events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data: [DONE] message with token usage statistics for + the entire request. + }, + "temperature": 0.0, # Optional. A value between 0.0 and 2.0 to control + randomness and creativity. Lower values like 0.2 make the output more focused and + deterministic, while higher values like 0.8 make it more random. + "tool_choice": {}, + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function to be + called. + "parameters": { + "str": {} # Optional. The parameters the function + accepts, described as a JSON Schema object. + } + } + ], + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. + "user": "str" # Optional. A unique identifier representing your end-user. + } + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the response was + created. Required. + "id": "str", # A unique identifier for the response. Required. + "model": "str", # The model used to generate the response. Required. + "object": "str", # The object type, which is always ``response``. Required. + "response" + "output": [ + { + "content": [ + { + "text": "str", # The text content. Required. + "type": "str" # The type of content part. + ``reasoning_text`` for reasoning content, ``output_text`` for + final output text. Required. Known values are: "reasoning_text" + and "output_text". + } + ], + "type": "str", # The type of output item. One of + ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known + values are: "reasoning", "message", and "function_call". + "arguments": "str", # Optional. JSON string of function + arguments (present when type is ``function_call``"" ). + "call_id": "str", # Optional. The unique ID of the function + tool call (present when type is ``function_call``"" ). + "id": "str", # Optional. The unique ID of the output item. + "name": "str", # Optional. The name of the function to call + (present when type is ``function_call``"" ). + "role": "str", # Optional. The role associated with this + output item (typically ``assistant``"" ). + "status": "str" # Optional. Status of the item. + } + ], + "usage": { + "input_tokens": 0, # The number of input tokens. Required. + "input_tokens_details": { + "cached_tokens": 0 # The number of tokens that were + retrieved from the cache. Required. + }, + "output_tokens": 0, # The number of output tokens. Required. + "output_tokens_details": { + "reasoning_tokens": 0, # The number of reasoning tokens. + Required. + "tool_output_tokens": 0 # The number of tool output tokens. + Required. + }, + "total_tokens": 0 # The total number of tokens used. Required. + }, + "max_output_tokens": 0, # Optional. Maximum output tokens setting. + "parallel_tool_calls": bool, # Optional. Whether parallel tool calls are + enabled. + "status": "str", # Optional. Status of the response. + "temperature": 0.0, # Optional. Temperature setting used for the response. + "tool_choice": "str", # Optional. Tool choice setting used for the response. + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function. + "parameters": { + "str": {} # Optional. The parameters the function + accepts. + } + } + ], + "top_p": 0.0, # Optional. Top-p setting used for the response. + "user": "str" # Optional. User identifier. + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_inference_create_response_request( + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + await self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + await response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + @overload + async def create_async_invoke( + self, body: JSON, *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Generate Image, Audio, or Text-to-Speech Using fal Models. + + Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous + job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll + for the result. + + :param body: Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "input": { + "enable_safety_checker": bool, # Optional. Whether to enable the + safety checker for generated content. + "guidance_scale": 0.0, # Optional. Controls how closely the image + generation model follows the prompt. Higher values produce output more + closely matching the prompt. + "num_images": 0, # Optional. The number of images to generate. + "num_inference_steps": 0, # Optional. The number of inference steps + to use during image generation. More steps generally produce higher quality + output but take longer. + "output_format": "str", # Optional. The desired output format or + aspect ratio for image generation. + "prompt": "str", # Optional. The text prompt describing the desired + output. Used for image generation and audio generation models. + "seconds_total": 0, # Optional. The total duration in seconds for + generated audio. Used for audio generation models. + "text": "str" # Optional. The text content to convert to speech. + Used for text-to-speech models. + }, + "model_id": "str", # The ID of the model to invoke asynchronously. Required. + "tags": [ + { + "key": "str", # The tag key. Required. + "value": "str" # The tag value. Required. + } + ] + } + + # response body for status code(s): 202 + response == { + "created_at": "2020-02-20 00:00:00", # The timestamp when the request was + created. Required. + "model_id": "str", # The model ID that was invoked. Required. + "request_id": "str", # A unique identifier for the async invocation request. + Use this ID to check the status and retrieve the result. Required. + "status": "str", # The current status of the async invocation. Required. + Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED". + "completed_at": "2020-02-20 00:00:00", # Optional. The timestamp when the + job completed. Null until finished. + "error": "str", # Optional. Error message if the job failed. Null on + success. + "output": { + "str": {} # Optional. The output of the invocation. Null while the + job is queued or in progress. Contains the result once completed. + }, + "started_at": "2020-02-20 00:00:00" # Optional. The timestamp when the job + started processing. Null while queued. + } + """ + + @overload + async def create_async_invoke( + self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Generate Image, Audio, or Text-to-Speech Using fal Models. + + Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous + job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll + for the result. + + :param body: Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 202 + response == { + "created_at": "2020-02-20 00:00:00", # The timestamp when the request was + created. Required. + "model_id": "str", # The model ID that was invoked. Required. + "request_id": "str", # A unique identifier for the async invocation request. + Use this ID to check the status and retrieve the result. Required. + "status": "str", # The current status of the async invocation. Required. + Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED". + "completed_at": "2020-02-20 00:00:00", # Optional. The timestamp when the + job completed. Null until finished. + "error": "str", # Optional. Error message if the job failed. Null on + success. + "output": { + "str": {} # Optional. The output of the invocation. Null while the + job is queued or in progress. Contains the result once completed. + }, + "started_at": "2020-02-20 00:00:00" # Optional. The timestamp when the job + started processing. Null while queued. + } + """ + + @distributed_trace_async + async def create_async_invoke( + self, body: Union[JSON, IO[bytes]], **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Generate Image, Audio, or Text-to-Speech Using fal Models. + + Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous + job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll + for the result. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "input": { + "enable_safety_checker": bool, # Optional. Whether to enable the + safety checker for generated content. + "guidance_scale": 0.0, # Optional. Controls how closely the image + generation model follows the prompt. Higher values produce output more + closely matching the prompt. + "num_images": 0, # Optional. The number of images to generate. + "num_inference_steps": 0, # Optional. The number of inference steps + to use during image generation. More steps generally produce higher quality + output but take longer. + "output_format": "str", # Optional. The desired output format or + aspect ratio for image generation. + "prompt": "str", # Optional. The text prompt describing the desired + output. Used for image generation and audio generation models. + "seconds_total": 0, # Optional. The total duration in seconds for + generated audio. Used for audio generation models. + "text": "str" # Optional. The text content to convert to speech. + Used for text-to-speech models. + }, + "model_id": "str", # The ID of the model to invoke asynchronously. Required. + "tags": [ + { + "key": "str", # The tag key. Required. + "value": "str" # The tag value. Required. + } + ] + } + + # response body for status code(s): 202 + response == { + "created_at": "2020-02-20 00:00:00", # The timestamp when the request was + created. Required. + "model_id": "str", # The model ID that was invoked. Required. + "request_id": "str", # A unique identifier for the async invocation request. + Use this ID to check the status and retrieve the result. Required. + "status": "str", # The current status of the async invocation. Required. + Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED". + "completed_at": "2020-02-20 00:00:00", # Optional. The timestamp when the + job completed. Null until finished. + "error": "str", # Optional. Error message if the job failed. Null on + success. + "output": { + "str": {} # Optional. The output of the invocation. Null while the + job is queued or in progress. Contains the result once completed. + }, + "started_at": "2020-02-20 00:00:00" # Optional. The timestamp when the job + started processing. Null while queued. + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_inference_create_async_invoke_request( + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + await self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [202]: + if _stream: + await response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + +class AgentInferenceOperations: + """ + .. warning:: + **DO NOT** instantiate this class directly. + + Instead, you should access the following operations through + :class:`~pydo.aio.GeneratedClient`'s + :attr:`agent_inference` attribute. + """ + + def __init__(self, *args, **kwargs) -> None: + input_args = list(args) + self._client = input_args.pop(0) if input_args else kwargs.pop("client") + self._config = input_args.pop(0) if input_args else kwargs.pop("config") + self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer") + self._deserialize = ( + input_args.pop(0) if input_args else kwargs.pop("deserializer") + ) + + @overload + async def create_chat_completion( + self, + body: JSON, + *, + agent: bool = True, + content_type: str = "application/json", + **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation via a customer-provisioned + agent endpoint. + + :param body: Required. + :type body: JSON + :keyword agent: Must be set to true for agent-based completion behavior. Default value is True. + :paramtype agent: bool + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "messages": [ + { + "role": "str", # The role of the message author. Required. + Known values are: "system", "developer", "user", "assistant", and "tool". + "content": "str", # Optional. The contents of the message. + "reasoning_content": "str", # Optional. The reasoning + content generated by the model (assistant messages only). + "refusal": "str", # Optional. The refusal message generated + by the model (assistant messages only). + "tool_call_id": "str", # Optional. Tool call that this + message is responding to (tool messages only). + "tool_calls": [ + { + "function": { + "arguments": "str", # The arguments + to call the function with, as generated by the model in JSON + format. Required. + "name": "str" # The name of the + function to call. Required. + }, + "id": "str", # The ID of the tool call. + Required. + "type": "str" # The type of the tool. + Currently, only function is supported. Required. "function" + } + ] + } + ], + "model": "str", # Model ID used to generate the response. Required. + "frequency_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on their existing frequency in + the text so far, decreasing the model's likelihood to repeat the same line + verbatim. + "logit_bias": { + "str": 0 # Optional. Modify the likelihood of specified tokens + appearing in the completion. Accepts a JSON object that maps tokens + (specified by their token ID in the tokenizer) to an associated bias value + from -100 to 100. Mathematically, the bias is added to the logits generated + by the model prior to sampling. The exact effect will vary per model, but + values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + }, + "logprobs": False, # Optional. Default value is False. Whether to return log + probabilities of the output tokens or not. If true, returns the log probabilities + of each output token returned in the content of message. + "max_completion_tokens": 0, # Optional. The maximum number of completion + tokens that may be used over the course of the run. The run will make a best + effort to use only the number of completion tokens specified, across multiple + turns of the run. + "max_tokens": 0, # Optional. The maximum number of tokens that can be + generated in the completion. The token count of your prompt plus max_tokens + cannot exceed the model's context length. + "metadata": { + "str": "str" # Optional. Set of 16 key-value pairs that can be + attached to an object. This can be useful for storing additional information + about the object in a structured format. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters. + }, + "n": 1, # Optional. Default value is 1. How many chat completion choices to + generate for each input message. Note that you will be charged based on the + number of generated tokens across all of the choices. Keep n as 1 to minimize + costs. + "presence_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. + "reasoning_effort": "str", # Optional. Constrains effort on reasoning for + reasoning models. Reducing reasoning effort can result in faster responses and + fewer tokens used on reasoning in a response. Known values are: "none", + "minimal", "low", "medium", "high", and "xhigh". + "seed": 0, # Optional. If specified, the system will make a best effort to + sample deterministically, such that repeated requests with the same seed and + parameters should return the same result. Determinism is not guaranteed. + "stop": {}, + "stream": False, # Optional. Default value is False. If set to true, the + model response data will be streamed to the client as it is generated using + server-sent events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data [DONE] message. The usage field on this chunk + shows the token usage statistics for the entire request, and the choices + field will always be an empty array. + }, + "temperature": 0.0, # Optional. What sampling temperature to use, between 0 + and 2. Higher values like 0.8 will make the output more random, while lower + values like 0.2 will make it more focused and deterministic. We generally + recommend altering this or top_p but not both. + "tool_choice": {}, + "tools": [ + { + "function": { + "name": "str", # The name of the function to be + called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. Required. + "description": "str", # Optional. A description of + what the function does, used by the model to choose when and how to + call the function. + "parameters": { + "str": {} # Optional. The parameters the + function accepts, described as a JSON Schema object. + } + }, + "type": "str" # The type of the tool. Currently, only + function is supported. Required. "function" + } + ], + "top_logprobs": 0, # Optional. An integer between 0 and 20 specifying the + number of most likely tokens to return at each token position, each with an + associated log probability. logprobs must be set to true if this parameter is + used. + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. So 0.1 means only the tokens comprising the top 10% + probability mass are considered. We generally recommend altering this or + temperature but not both. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + + @overload + async def create_chat_completion( + self, + body: IO[bytes], + *, + agent: bool = True, + content_type: str = "application/json", + **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation via a customer-provisioned + agent endpoint. + + :param body: Required. + :type body: IO[bytes] + :keyword agent: Must be set to true for agent-based completion behavior. Default value is True. + :paramtype agent: bool + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + + @distributed_trace_async + async def create_chat_completion( + self, body: Union[JSON, IO[bytes]], *, agent: bool = True, **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation via a customer-provisioned + agent endpoint. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :keyword agent: Must be set to true for agent-based completion behavior. Default value is True. + :paramtype agent: bool + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "messages": [ + { + "role": "str", # The role of the message author. Required. + Known values are: "system", "developer", "user", "assistant", and "tool". + "content": "str", # Optional. The contents of the message. + "reasoning_content": "str", # Optional. The reasoning + content generated by the model (assistant messages only). + "refusal": "str", # Optional. The refusal message generated + by the model (assistant messages only). + "tool_call_id": "str", # Optional. Tool call that this + message is responding to (tool messages only). + "tool_calls": [ + { + "function": { + "arguments": "str", # The arguments + to call the function with, as generated by the model in JSON + format. Required. + "name": "str" # The name of the + function to call. Required. + }, + "id": "str", # The ID of the tool call. + Required. + "type": "str" # The type of the tool. + Currently, only function is supported. Required. "function" + } + ] + } + ], + "model": "str", # Model ID used to generate the response. Required. + "frequency_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on their existing frequency in + the text so far, decreasing the model's likelihood to repeat the same line + verbatim. + "logit_bias": { + "str": 0 # Optional. Modify the likelihood of specified tokens + appearing in the completion. Accepts a JSON object that maps tokens + (specified by their token ID in the tokenizer) to an associated bias value + from -100 to 100. Mathematically, the bias is added to the logits generated + by the model prior to sampling. The exact effect will vary per model, but + values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + }, + "logprobs": False, # Optional. Default value is False. Whether to return log + probabilities of the output tokens or not. If true, returns the log probabilities + of each output token returned in the content of message. + "max_completion_tokens": 0, # Optional. The maximum number of completion + tokens that may be used over the course of the run. The run will make a best + effort to use only the number of completion tokens specified, across multiple + turns of the run. + "max_tokens": 0, # Optional. The maximum number of tokens that can be + generated in the completion. The token count of your prompt plus max_tokens + cannot exceed the model's context length. + "metadata": { + "str": "str" # Optional. Set of 16 key-value pairs that can be + attached to an object. This can be useful for storing additional information + about the object in a structured format. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters. + }, + "n": 1, # Optional. Default value is 1. How many chat completion choices to + generate for each input message. Note that you will be charged based on the + number of generated tokens across all of the choices. Keep n as 1 to minimize + costs. + "presence_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. + "reasoning_effort": "str", # Optional. Constrains effort on reasoning for + reasoning models. Reducing reasoning effort can result in faster responses and + fewer tokens used on reasoning in a response. Known values are: "none", + "minimal", "low", "medium", "high", and "xhigh". + "seed": 0, # Optional. If specified, the system will make a best effort to + sample deterministically, such that repeated requests with the same seed and + parameters should return the same result. Determinism is not guaranteed. + "stop": {}, + "stream": False, # Optional. Default value is False. If set to true, the + model response data will be streamed to the client as it is generated using + server-sent events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data [DONE] message. The usage field on this chunk + shows the token usage statistics for the entire request, and the choices + field will always be an empty array. + }, + "temperature": 0.0, # Optional. What sampling temperature to use, between 0 + and 2. Higher values like 0.8 will make the output more random, while lower + values like 0.2 will make it more focused and deterministic. We generally + recommend altering this or top_p but not both. + "tool_choice": {}, + "tools": [ + { + "function": { + "name": "str", # The name of the function to be + called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. Required. + "description": "str", # Optional. A description of + what the function does, used by the model to choose when and how to + call the function. + "parameters": { + "str": {} # Optional. The parameters the + function accepts, described as a JSON Schema object. + } + }, + "type": "str" # The type of the tool. Currently, only + function is supported. Required. "function" + } + ], + "top_logprobs": 0, # Optional. An integer between 0 and 20 specifying the + number of most likely tokens to return at each token position, each with an + associated log probability. logprobs must be set to true if this parameter is + used. + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. So 0.1 means only the tokens comprising the top 10% + probability mass are considered. We generally recommend altering this or + temperature but not both. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_agent_inference_create_chat_completion_request( + agent=agent, + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + await self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + await response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore diff --git a/src/pydo/operations/__init__.py b/src/pydo/operations/__init__.py index 0c8c048..4286825 100644 --- a/src/pydo/operations/__init__.py +++ b/src/pydo/operations/__init__.py @@ -54,6 +54,8 @@ from ._operations import VpcnatgatewaysOperations from ._operations import UptimeOperations from ._operations import GenaiOperations +from ._operations import InferenceOperations +from ._operations import AgentInferenceOperations from ._patch import __all__ as _patch_all from ._patch import * # pylint: disable=unused-wildcard-import @@ -110,6 +112,8 @@ "VpcnatgatewaysOperations", "UptimeOperations", "GenaiOperations", + "InferenceOperations", + "AgentInferenceOperations", ] __all__.extend([p for p in _patch_all if p not in __all__]) _patch_sdk() diff --git a/src/pydo/operations/_operations.py b/src/pydo/operations/_operations.py index 4795b9e..b466b61 100644 --- a/src/pydo/operations/_operations.py +++ b/src/pydo/operations/_operations.py @@ -14644,6 +14644,137 @@ def build_genai_list_evaluation_test_cases_by_workspace_request( # pylint: disa return HttpRequest(method="GET", url=_url, headers=_headers, **kwargs) +def build_inference_create_chat_completion_request( + **kwargs: Any, +) -> HttpRequest: # pylint: disable=name-too-long + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + accept = _headers.pop("Accept", "application/json") + + # Construct URL + _url = "/v1/chat/completions" + + # Construct headers + if content_type is not None: + _headers["Content-Type"] = _SERIALIZER.header( + "content_type", content_type, "str" + ) + _headers["Accept"] = _SERIALIZER.header("accept", accept, "str") + + return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs) + + +def build_inference_create_image_request(**kwargs: Any) -> HttpRequest: + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + accept = _headers.pop("Accept", "application/json") + + # Construct URL + _url = "/v1/images/generations" + + # Construct headers + if content_type is not None: + _headers["Content-Type"] = _SERIALIZER.header( + "content_type", content_type, "str" + ) + _headers["Accept"] = _SERIALIZER.header("accept", accept, "str") + + return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs) + + +def build_inference_list_models_request(**kwargs: Any) -> HttpRequest: + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + + accept = _headers.pop("Accept", "application/json") + + # Construct URL + _url = "/v1/models" + + # Construct headers + _headers["Accept"] = _SERIALIZER.header("accept", accept, "str") + + return HttpRequest(method="GET", url=_url, headers=_headers, **kwargs) + + +def build_inference_create_response_request(**kwargs: Any) -> HttpRequest: + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + accept = _headers.pop("Accept", "application/json") + + # Construct URL + _url = "/v1/responses" + + # Construct headers + if content_type is not None: + _headers["Content-Type"] = _SERIALIZER.header( + "content_type", content_type, "str" + ) + _headers["Accept"] = _SERIALIZER.header("accept", accept, "str") + + return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs) + + +def build_inference_create_async_invoke_request( + **kwargs: Any, +) -> HttpRequest: # pylint: disable=name-too-long + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + accept = _headers.pop("Accept", "application/json") + + # Construct URL + _url = "/v1/async-invoke" + + # Construct headers + if content_type is not None: + _headers["Content-Type"] = _SERIALIZER.header( + "content_type", content_type, "str" + ) + _headers["Accept"] = _SERIALIZER.header("accept", accept, "str") + + return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs) + + +def build_agent_inference_create_chat_completion_request( # pylint: disable=name-too-long + *, agent: bool = True, **kwargs: Any +) -> HttpRequest: + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = case_insensitive_dict(kwargs.pop("params", {}) or {}) + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + accept = _headers.pop("Accept", "application/json") + + # Construct URL + _url = "/api/v1/chat/completions" + + # Construct parameters + _params["agent"] = _SERIALIZER.query("agent", agent, "bool") + + # Construct headers + if content_type is not None: + _headers["Content-Type"] = _SERIALIZER.header( + "content_type", content_type, "str" + ) + _headers["Accept"] = _SERIALIZER.header("accept", accept, "str") + + return HttpRequest( + method="POST", url=_url, params=_params, headers=_headers, **kwargs + ) + + class OneClicksOperations: """ .. warning:: @@ -266623,3 +266754,2654 @@ def list_evaluation_test_cases_by_workspace( return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore return cast(JSON, deserialized) # type: ignore + + +class InferenceOperations: + """ + .. warning:: + **DO NOT** instantiate this class directly. + + Instead, you should access the following operations through + :class:`~pydo.GeneratedClient`'s + :attr:`inference` attribute. + """ + + def __init__(self, *args, **kwargs): + input_args = list(args) + self._client = input_args.pop(0) if input_args else kwargs.pop("client") + self._config = input_args.pop(0) if input_args else kwargs.pop("config") + self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer") + self._deserialize = ( + input_args.pop(0) if input_args else kwargs.pop("deserializer") + ) + + @overload + def create_chat_completion( + self, body: JSON, *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation. + + :param body: Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "messages": [ + { + "role": "str", # The role of the message author. Required. + Known values are: "system", "developer", "user", "assistant", and "tool". + "content": "str", # Optional. The contents of the message. + "reasoning_content": "str", # Optional. The reasoning + content generated by the model (assistant messages only). + "refusal": "str", # Optional. The refusal message generated + by the model (assistant messages only). + "tool_call_id": "str", # Optional. Tool call that this + message is responding to (tool messages only). + "tool_calls": [ + { + "function": { + "arguments": "str", # The arguments + to call the function with, as generated by the model in JSON + format. Required. + "name": "str" # The name of the + function to call. Required. + }, + "id": "str", # The ID of the tool call. + Required. + "type": "str" # The type of the tool. + Currently, only function is supported. Required. "function" + } + ] + } + ], + "model": "str", # Model ID used to generate the response. Required. + "frequency_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on their existing frequency in + the text so far, decreasing the model's likelihood to repeat the same line + verbatim. + "logit_bias": { + "str": 0 # Optional. Modify the likelihood of specified tokens + appearing in the completion. Accepts a JSON object that maps tokens + (specified by their token ID in the tokenizer) to an associated bias value + from -100 to 100. Mathematically, the bias is added to the logits generated + by the model prior to sampling. The exact effect will vary per model, but + values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + }, + "logprobs": False, # Optional. Default value is False. Whether to return log + probabilities of the output tokens or not. If true, returns the log probabilities + of each output token returned in the content of message. + "max_completion_tokens": 0, # Optional. The maximum number of completion + tokens that may be used over the course of the run. The run will make a best + effort to use only the number of completion tokens specified, across multiple + turns of the run. + "max_tokens": 0, # Optional. The maximum number of tokens that can be + generated in the completion. The token count of your prompt plus max_tokens + cannot exceed the model's context length. + "metadata": { + "str": "str" # Optional. Set of 16 key-value pairs that can be + attached to an object. This can be useful for storing additional information + about the object in a structured format. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters. + }, + "n": 1, # Optional. Default value is 1. How many chat completion choices to + generate for each input message. Note that you will be charged based on the + number of generated tokens across all of the choices. Keep n as 1 to minimize + costs. + "presence_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. + "reasoning_effort": "str", # Optional. Constrains effort on reasoning for + reasoning models. Reducing reasoning effort can result in faster responses and + fewer tokens used on reasoning in a response. Known values are: "none", + "minimal", "low", "medium", "high", and "xhigh". + "seed": 0, # Optional. If specified, the system will make a best effort to + sample deterministically, such that repeated requests with the same seed and + parameters should return the same result. Determinism is not guaranteed. + "stop": {}, + "stream": False, # Optional. Default value is False. If set to true, the + model response data will be streamed to the client as it is generated using + server-sent events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data [DONE] message. The usage field on this chunk + shows the token usage statistics for the entire request, and the choices + field will always be an empty array. + }, + "temperature": 0.0, # Optional. What sampling temperature to use, between 0 + and 2. Higher values like 0.8 will make the output more random, while lower + values like 0.2 will make it more focused and deterministic. We generally + recommend altering this or top_p but not both. + "tool_choice": {}, + "tools": [ + { + "function": { + "name": "str", # The name of the function to be + called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. Required. + "description": "str", # Optional. A description of + what the function does, used by the model to choose when and how to + call the function. + "parameters": { + "str": {} # Optional. The parameters the + function accepts, described as a JSON Schema object. + } + }, + "type": "str" # The type of the tool. Currently, only + function is supported. Required. "function" + } + ], + "top_logprobs": 0, # Optional. An integer between 0 and 20 specifying the + number of most likely tokens to return at each token position, each with an + associated log probability. logprobs must be set to true if this parameter is + used. + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. So 0.1 means only the tokens comprising the top 10% + probability mass are considered. We generally recommend altering this or + temperature but not both. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + + @overload + def create_chat_completion( + self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation. + + :param body: Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + + @distributed_trace + def create_chat_completion( + self, body: Union[JSON, IO[bytes]], **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "messages": [ + { + "role": "str", # The role of the message author. Required. + Known values are: "system", "developer", "user", "assistant", and "tool". + "content": "str", # Optional. The contents of the message. + "reasoning_content": "str", # Optional. The reasoning + content generated by the model (assistant messages only). + "refusal": "str", # Optional. The refusal message generated + by the model (assistant messages only). + "tool_call_id": "str", # Optional. Tool call that this + message is responding to (tool messages only). + "tool_calls": [ + { + "function": { + "arguments": "str", # The arguments + to call the function with, as generated by the model in JSON + format. Required. + "name": "str" # The name of the + function to call. Required. + }, + "id": "str", # The ID of the tool call. + Required. + "type": "str" # The type of the tool. + Currently, only function is supported. Required. "function" + } + ] + } + ], + "model": "str", # Model ID used to generate the response. Required. + "frequency_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on their existing frequency in + the text so far, decreasing the model's likelihood to repeat the same line + verbatim. + "logit_bias": { + "str": 0 # Optional. Modify the likelihood of specified tokens + appearing in the completion. Accepts a JSON object that maps tokens + (specified by their token ID in the tokenizer) to an associated bias value + from -100 to 100. Mathematically, the bias is added to the logits generated + by the model prior to sampling. The exact effect will vary per model, but + values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + }, + "logprobs": False, # Optional. Default value is False. Whether to return log + probabilities of the output tokens or not. If true, returns the log probabilities + of each output token returned in the content of message. + "max_completion_tokens": 0, # Optional. The maximum number of completion + tokens that may be used over the course of the run. The run will make a best + effort to use only the number of completion tokens specified, across multiple + turns of the run. + "max_tokens": 0, # Optional. The maximum number of tokens that can be + generated in the completion. The token count of your prompt plus max_tokens + cannot exceed the model's context length. + "metadata": { + "str": "str" # Optional. Set of 16 key-value pairs that can be + attached to an object. This can be useful for storing additional information + about the object in a structured format. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters. + }, + "n": 1, # Optional. Default value is 1. How many chat completion choices to + generate for each input message. Note that you will be charged based on the + number of generated tokens across all of the choices. Keep n as 1 to minimize + costs. + "presence_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. + "reasoning_effort": "str", # Optional. Constrains effort on reasoning for + reasoning models. Reducing reasoning effort can result in faster responses and + fewer tokens used on reasoning in a response. Known values are: "none", + "minimal", "low", "medium", "high", and "xhigh". + "seed": 0, # Optional. If specified, the system will make a best effort to + sample deterministically, such that repeated requests with the same seed and + parameters should return the same result. Determinism is not guaranteed. + "stop": {}, + "stream": False, # Optional. Default value is False. If set to true, the + model response data will be streamed to the client as it is generated using + server-sent events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data [DONE] message. The usage field on this chunk + shows the token usage statistics for the entire request, and the choices + field will always be an empty array. + }, + "temperature": 0.0, # Optional. What sampling temperature to use, between 0 + and 2. Higher values like 0.8 will make the output more random, while lower + values like 0.2 will make it more focused and deterministic. We generally + recommend altering this or top_p but not both. + "tool_choice": {}, + "tools": [ + { + "function": { + "name": "str", # The name of the function to be + called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. Required. + "description": "str", # Optional. A description of + what the function does, used by the model to choose when and how to + call the function. + "parameters": { + "str": {} # Optional. The parameters the + function accepts, described as a JSON Schema object. + } + }, + "type": "str" # The type of the tool. Currently, only + function is supported. Required. "function" + } + ], + "top_logprobs": 0, # Optional. An integer between 0 and 20 specifying the + number of most likely tokens to return at each token position, each with an + associated log probability. logprobs must be set to true if this parameter is + used. + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. So 0.1 means only the tokens comprising the top 10% + probability mass are considered. We generally recommend altering this or + temperature but not both. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_inference_create_chat_completion_request( + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + @overload + def create_image( + self, body: JSON, *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Generate images from text prompts. + + Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation + model with automatic prompt optimization and enhanced visual capabilities. + + :param body: Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "model": "str", # The model to use for image generation. Required. + "n": 0, # The number of images to generate. Must be between 1 and 10. + Required. + "prompt": "str", # A text description of the desired image(s). Supports up + to 32,000 characters and provides automatic prompt optimization for best results. + Required. + "background": "str", # Optional. The background setting for the image + generation. Supported values: transparent, opaque, auto. + "moderation": "str", # Optional. The moderation setting for the image + generation. Supported values: low, auto. + "output_compression": 0, # Optional. The output compression level for the + image generation (0-100). + "output_format": "str", # Optional. The output format for the image + generation. Supported values: png, webp, jpeg. + "partial_images": 0, # Optional. The number of partial image chunks to + return during streaming generation. Defaults to 0. When stream=true, this must be + greater than 0 to receive progressive updates of the image as it is being + generated. + "quality": "str", # Optional. The quality of the image that will be + generated. Supported values: auto, high, medium, low. + "size": "str", # Optional. The size of the generated images. GPT-IMAGE-1 + supports: auto (automatically select best size), 1536x1024 (landscape), 1024x1536 + (portrait). Known values are: "auto", "1536x1024", and "1024x1536". + "stream": False, # Optional. Default value is False. If set to true, partial + image data will be streamed as the image is being generated. The response will be + sent as server-sent events with partial image chunks. When stream is true, + partial_images must be greater than 0. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the images were + created. Required. + "data": [ + { + "b64_json": "str", # The base64-encoded JSON of the + generated image. Required. + "revised_prompt": "str" # Optional. The optimized prompt + that was used to generate the image. + } + ], + "background": "str", # Optional. The background setting used for the image + generation. + "output_format": "str", # Optional. The output format of the generated + image. + "quality": "str", # Optional. The quality setting used for the image + generation. + "size": "str", # Optional. The size of the generated image. + "usage": { + "input_tokens": 0, # The number of tokens (images and text) in the + input prompt. Required. + "input_tokens_details": { + "image_tokens": 0, # The number of image tokens in the input + prompt. Required. + "text_tokens": 0 # The number of text tokens in the input + prompt. Required. + }, + "output_tokens": 0, # The number of image tokens in the output + image. Required. + "total_tokens": 0 # The total number of tokens (images and text) + used for the image generation. Required. + } + } + """ + + @overload + def create_image( + self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + """Generate images from text prompts. + + Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation + model with automatic prompt optimization and enhanced visual capabilities. + + :param body: Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the images were + created. Required. + "data": [ + { + "b64_json": "str", # The base64-encoded JSON of the + generated image. Required. + "revised_prompt": "str" # Optional. The optimized prompt + that was used to generate the image. + } + ], + "background": "str", # Optional. The background setting used for the image + generation. + "output_format": "str", # Optional. The output format of the generated + image. + "quality": "str", # Optional. The quality setting used for the image + generation. + "size": "str", # Optional. The size of the generated image. + "usage": { + "input_tokens": 0, # The number of tokens (images and text) in the + input prompt. Required. + "input_tokens_details": { + "image_tokens": 0, # The number of image tokens in the input + prompt. Required. + "text_tokens": 0 # The number of text tokens in the input + prompt. Required. + }, + "output_tokens": 0, # The number of image tokens in the output + image. Required. + "total_tokens": 0 # The total number of tokens (images and text) + used for the image generation. Required. + } + } + """ + + @distributed_trace + def create_image(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON: + # pylint: disable=line-too-long + """Generate images from text prompts. + + Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation + model with automatic prompt optimization and enhanced visual capabilities. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "model": "str", # The model to use for image generation. Required. + "n": 0, # The number of images to generate. Must be between 1 and 10. + Required. + "prompt": "str", # A text description of the desired image(s). Supports up + to 32,000 characters and provides automatic prompt optimization for best results. + Required. + "background": "str", # Optional. The background setting for the image + generation. Supported values: transparent, opaque, auto. + "moderation": "str", # Optional. The moderation setting for the image + generation. Supported values: low, auto. + "output_compression": 0, # Optional. The output compression level for the + image generation (0-100). + "output_format": "str", # Optional. The output format for the image + generation. Supported values: png, webp, jpeg. + "partial_images": 0, # Optional. The number of partial image chunks to + return during streaming generation. Defaults to 0. When stream=true, this must be + greater than 0 to receive progressive updates of the image as it is being + generated. + "quality": "str", # Optional. The quality of the image that will be + generated. Supported values: auto, high, medium, low. + "size": "str", # Optional. The size of the generated images. GPT-IMAGE-1 + supports: auto (automatically select best size), 1536x1024 (landscape), 1024x1536 + (portrait). Known values are: "auto", "1536x1024", and "1024x1536". + "stream": False, # Optional. Default value is False. If set to true, partial + image data will be streamed as the image is being generated. The response will be + sent as server-sent events with partial image chunks. When stream is true, + partial_images must be greater than 0. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the images were + created. Required. + "data": [ + { + "b64_json": "str", # The base64-encoded JSON of the + generated image. Required. + "revised_prompt": "str" # Optional. The optimized prompt + that was used to generate the image. + } + ], + "background": "str", # Optional. The background setting used for the image + generation. + "output_format": "str", # Optional. The output format of the generated + image. + "quality": "str", # Optional. The quality setting used for the image + generation. + "size": "str", # Optional. The size of the generated image. + "usage": { + "input_tokens": 0, # The number of tokens (images and text) in the + input prompt. Required. + "input_tokens_details": { + "image_tokens": 0, # The number of image tokens in the input + prompt. Required. + "text_tokens": 0 # The number of text tokens in the input + prompt. Required. + }, + "output_tokens": 0, # The number of image tokens in the output + image. Required. + "total_tokens": 0 # The total number of tokens (images and text) + used for the image generation. Required. + } + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_inference_create_image_request( + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + @distributed_trace + def list_models(self, **kwargs: Any) -> JSON: + """List available models. + + Lists the currently available models, and provides basic information about each one such as the + owner and availability. + + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "data": [ + { + "created": 0, # The Unix timestamp (in seconds) when the + model was created. Required. + "id": "str", # The model identifier, which can be referenced + in the API endpoints. Required. + "object": "str", # The object type, which is always "model". + Required. "model" + "owned_by": "str" # The organization that owns the model. + Required. + } + ], + "object": "str" # The object type, which is always "list". Required. "list" + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = kwargs.pop("headers", {}) or {} + _params = kwargs.pop("params", {}) or {} + + cls: ClsType[JSON] = kwargs.pop("cls", None) + + _request = build_inference_list_models_request( + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + @overload + def create_response( + self, body: JSON, *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Send Prompt to a Model Using the Responses API. + + Generate text responses from text prompts. This endpoint supports both streaming and + non-streaming responses for supported text models. + + :param body: Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "input": {}, + "model": "str", # The model ID of the model you want to use. Get the model + ID using ``/v1/models`` or on the available models page. Required. + "instructions": "str", # Optional. System-level instructions for the model. + This sets the behavior and context for the response generation. + "max_output_tokens": 0, # Optional. The maximum number of tokens to generate + in the response. + "metadata": { + "str": "str" # Optional. Set of key-value pairs that can be attached + to the request. + }, + "stop": {}, + "stream": False, # Optional. Default value is False. Set to true to stream + partial responses as Server-Sent Events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data: [DONE] message with token usage statistics for + the entire request. + }, + "temperature": 0.0, # Optional. A value between 0.0 and 2.0 to control + randomness and creativity. Lower values like 0.2 make the output more focused and + deterministic, while higher values like 0.8 make it more random. + "tool_choice": {}, + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function to be + called. + "parameters": { + "str": {} # Optional. The parameters the function + accepts, described as a JSON Schema object. + } + } + ], + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. + "user": "str" # Optional. A unique identifier representing your end-user. + } + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the response was + created. Required. + "id": "str", # A unique identifier for the response. Required. + "model": "str", # The model used to generate the response. Required. + "object": "str", # The object type, which is always ``response``. Required. + "response" + "output": [ + { + "content": [ + { + "text": "str", # The text content. Required. + "type": "str" # The type of content part. + ``reasoning_text`` for reasoning content, ``output_text`` for + final output text. Required. Known values are: "reasoning_text" + and "output_text". + } + ], + "type": "str", # The type of output item. One of + ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known + values are: "reasoning", "message", and "function_call". + "arguments": "str", # Optional. JSON string of function + arguments (present when type is ``function_call``"" ). + "call_id": "str", # Optional. The unique ID of the function + tool call (present when type is ``function_call``"" ). + "id": "str", # Optional. The unique ID of the output item. + "name": "str", # Optional. The name of the function to call + (present when type is ``function_call``"" ). + "role": "str", # Optional. The role associated with this + output item (typically ``assistant``"" ). + "status": "str" # Optional. Status of the item. + } + ], + "usage": { + "input_tokens": 0, # The number of input tokens. Required. + "input_tokens_details": { + "cached_tokens": 0 # The number of tokens that were + retrieved from the cache. Required. + }, + "output_tokens": 0, # The number of output tokens. Required. + "output_tokens_details": { + "reasoning_tokens": 0, # The number of reasoning tokens. + Required. + "tool_output_tokens": 0 # The number of tool output tokens. + Required. + }, + "total_tokens": 0 # The total number of tokens used. Required. + }, + "max_output_tokens": 0, # Optional. Maximum output tokens setting. + "parallel_tool_calls": bool, # Optional. Whether parallel tool calls are + enabled. + "status": "str", # Optional. Status of the response. + "temperature": 0.0, # Optional. Temperature setting used for the response. + "tool_choice": "str", # Optional. Tool choice setting used for the response. + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function. + "parameters": { + "str": {} # Optional. The parameters the function + accepts. + } + } + ], + "top_p": 0.0, # Optional. Top-p setting used for the response. + "user": "str" # Optional. User identifier. + } + """ + + @overload + def create_response( + self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Send Prompt to a Model Using the Responses API. + + Generate text responses from text prompts. This endpoint supports both streaming and + non-streaming responses for supported text models. + + :param body: Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the response was + created. Required. + "id": "str", # A unique identifier for the response. Required. + "model": "str", # The model used to generate the response. Required. + "object": "str", # The object type, which is always ``response``. Required. + "response" + "output": [ + { + "content": [ + { + "text": "str", # The text content. Required. + "type": "str" # The type of content part. + ``reasoning_text`` for reasoning content, ``output_text`` for + final output text. Required. Known values are: "reasoning_text" + and "output_text". + } + ], + "type": "str", # The type of output item. One of + ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known + values are: "reasoning", "message", and "function_call". + "arguments": "str", # Optional. JSON string of function + arguments (present when type is ``function_call``"" ). + "call_id": "str", # Optional. The unique ID of the function + tool call (present when type is ``function_call``"" ). + "id": "str", # Optional. The unique ID of the output item. + "name": "str", # Optional. The name of the function to call + (present when type is ``function_call``"" ). + "role": "str", # Optional. The role associated with this + output item (typically ``assistant``"" ). + "status": "str" # Optional. Status of the item. + } + ], + "usage": { + "input_tokens": 0, # The number of input tokens. Required. + "input_tokens_details": { + "cached_tokens": 0 # The number of tokens that were + retrieved from the cache. Required. + }, + "output_tokens": 0, # The number of output tokens. Required. + "output_tokens_details": { + "reasoning_tokens": 0, # The number of reasoning tokens. + Required. + "tool_output_tokens": 0 # The number of tool output tokens. + Required. + }, + "total_tokens": 0 # The total number of tokens used. Required. + }, + "max_output_tokens": 0, # Optional. Maximum output tokens setting. + "parallel_tool_calls": bool, # Optional. Whether parallel tool calls are + enabled. + "status": "str", # Optional. Status of the response. + "temperature": 0.0, # Optional. Temperature setting used for the response. + "tool_choice": "str", # Optional. Tool choice setting used for the response. + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function. + "parameters": { + "str": {} # Optional. The parameters the function + accepts. + } + } + ], + "top_p": 0.0, # Optional. Top-p setting used for the response. + "user": "str" # Optional. User identifier. + } + """ + + @distributed_trace + def create_response(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON: + # pylint: disable=line-too-long + """Send Prompt to a Model Using the Responses API. + + Generate text responses from text prompts. This endpoint supports both streaming and + non-streaming responses for supported text models. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "input": {}, + "model": "str", # The model ID of the model you want to use. Get the model + ID using ``/v1/models`` or on the available models page. Required. + "instructions": "str", # Optional. System-level instructions for the model. + This sets the behavior and context for the response generation. + "max_output_tokens": 0, # Optional. The maximum number of tokens to generate + in the response. + "metadata": { + "str": "str" # Optional. Set of key-value pairs that can be attached + to the request. + }, + "stop": {}, + "stream": False, # Optional. Default value is False. Set to true to stream + partial responses as Server-Sent Events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data: [DONE] message with token usage statistics for + the entire request. + }, + "temperature": 0.0, # Optional. A value between 0.0 and 2.0 to control + randomness and creativity. Lower values like 0.2 make the output more focused and + deterministic, while higher values like 0.8 make it more random. + "tool_choice": {}, + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function to be + called. + "parameters": { + "str": {} # Optional. The parameters the function + accepts, described as a JSON Schema object. + } + } + ], + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. + "user": "str" # Optional. A unique identifier representing your end-user. + } + + # response body for status code(s): 200 + response == { + "created": 0, # The Unix timestamp (in seconds) of when the response was + created. Required. + "id": "str", # A unique identifier for the response. Required. + "model": "str", # The model used to generate the response. Required. + "object": "str", # The object type, which is always ``response``. Required. + "response" + "output": [ + { + "content": [ + { + "text": "str", # The text content. Required. + "type": "str" # The type of content part. + ``reasoning_text`` for reasoning content, ``output_text`` for + final output text. Required. Known values are: "reasoning_text" + and "output_text". + } + ], + "type": "str", # The type of output item. One of + ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known + values are: "reasoning", "message", and "function_call". + "arguments": "str", # Optional. JSON string of function + arguments (present when type is ``function_call``"" ). + "call_id": "str", # Optional. The unique ID of the function + tool call (present when type is ``function_call``"" ). + "id": "str", # Optional. The unique ID of the output item. + "name": "str", # Optional. The name of the function to call + (present when type is ``function_call``"" ). + "role": "str", # Optional. The role associated with this + output item (typically ``assistant``"" ). + "status": "str" # Optional. Status of the item. + } + ], + "usage": { + "input_tokens": 0, # The number of input tokens. Required. + "input_tokens_details": { + "cached_tokens": 0 # The number of tokens that were + retrieved from the cache. Required. + }, + "output_tokens": 0, # The number of output tokens. Required. + "output_tokens_details": { + "reasoning_tokens": 0, # The number of reasoning tokens. + Required. + "tool_output_tokens": 0 # The number of tool output tokens. + Required. + }, + "total_tokens": 0 # The total number of tokens used. Required. + }, + "max_output_tokens": 0, # Optional. Maximum output tokens setting. + "parallel_tool_calls": bool, # Optional. Whether parallel tool calls are + enabled. + "status": "str", # Optional. Status of the response. + "temperature": 0.0, # Optional. Temperature setting used for the response. + "tool_choice": "str", # Optional. Tool choice setting used for the response. + "tools": [ + { + "type": "str", # The type of the tool. Required. "function" + "description": "str", # Optional. A description of what the + function does. + "name": "str", # Optional. The name of the function. + "parameters": { + "str": {} # Optional. The parameters the function + accepts. + } + } + ], + "top_p": 0.0, # Optional. Top-p setting used for the response. + "user": "str" # Optional. User identifier. + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_inference_create_response_request( + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + @overload + def create_async_invoke( + self, body: JSON, *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Generate Image, Audio, or Text-to-Speech Using fal Models. + + Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous + job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll + for the result. + + :param body: Required. + :type body: JSON + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "input": { + "enable_safety_checker": bool, # Optional. Whether to enable the + safety checker for generated content. + "guidance_scale": 0.0, # Optional. Controls how closely the image + generation model follows the prompt. Higher values produce output more + closely matching the prompt. + "num_images": 0, # Optional. The number of images to generate. + "num_inference_steps": 0, # Optional. The number of inference steps + to use during image generation. More steps generally produce higher quality + output but take longer. + "output_format": "str", # Optional. The desired output format or + aspect ratio for image generation. + "prompt": "str", # Optional. The text prompt describing the desired + output. Used for image generation and audio generation models. + "seconds_total": 0, # Optional. The total duration in seconds for + generated audio. Used for audio generation models. + "text": "str" # Optional. The text content to convert to speech. + Used for text-to-speech models. + }, + "model_id": "str", # The ID of the model to invoke asynchronously. Required. + "tags": [ + { + "key": "str", # The tag key. Required. + "value": "str" # The tag value. Required. + } + ] + } + + # response body for status code(s): 202 + response == { + "created_at": "2020-02-20 00:00:00", # The timestamp when the request was + created. Required. + "model_id": "str", # The model ID that was invoked. Required. + "request_id": "str", # A unique identifier for the async invocation request. + Use this ID to check the status and retrieve the result. Required. + "status": "str", # The current status of the async invocation. Required. + Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED". + "completed_at": "2020-02-20 00:00:00", # Optional. The timestamp when the + job completed. Null until finished. + "error": "str", # Optional. Error message if the job failed. Null on + success. + "output": { + "str": {} # Optional. The output of the invocation. Null while the + job is queued or in progress. Contains the result once completed. + }, + "started_at": "2020-02-20 00:00:00" # Optional. The timestamp when the job + started processing. Null while queued. + } + """ + + @overload + def create_async_invoke( + self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Generate Image, Audio, or Text-to-Speech Using fal Models. + + Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous + job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll + for the result. + + :param body: Required. + :type body: IO[bytes] + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 202 + response == { + "created_at": "2020-02-20 00:00:00", # The timestamp when the request was + created. Required. + "model_id": "str", # The model ID that was invoked. Required. + "request_id": "str", # A unique identifier for the async invocation request. + Use this ID to check the status and retrieve the result. Required. + "status": "str", # The current status of the async invocation. Required. + Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED". + "completed_at": "2020-02-20 00:00:00", # Optional. The timestamp when the + job completed. Null until finished. + "error": "str", # Optional. Error message if the job failed. Null on + success. + "output": { + "str": {} # Optional. The output of the invocation. Null while the + job is queued or in progress. Contains the result once completed. + }, + "started_at": "2020-02-20 00:00:00" # Optional. The timestamp when the job + started processing. Null while queued. + } + """ + + @distributed_trace + def create_async_invoke(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON: + # pylint: disable=line-too-long + """Generate Image, Audio, or Text-to-Speech Using fal Models. + + Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous + job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll + for the result. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "input": { + "enable_safety_checker": bool, # Optional. Whether to enable the + safety checker for generated content. + "guidance_scale": 0.0, # Optional. Controls how closely the image + generation model follows the prompt. Higher values produce output more + closely matching the prompt. + "num_images": 0, # Optional. The number of images to generate. + "num_inference_steps": 0, # Optional. The number of inference steps + to use during image generation. More steps generally produce higher quality + output but take longer. + "output_format": "str", # Optional. The desired output format or + aspect ratio for image generation. + "prompt": "str", # Optional. The text prompt describing the desired + output. Used for image generation and audio generation models. + "seconds_total": 0, # Optional. The total duration in seconds for + generated audio. Used for audio generation models. + "text": "str" # Optional. The text content to convert to speech. + Used for text-to-speech models. + }, + "model_id": "str", # The ID of the model to invoke asynchronously. Required. + "tags": [ + { + "key": "str", # The tag key. Required. + "value": "str" # The tag value. Required. + } + ] + } + + # response body for status code(s): 202 + response == { + "created_at": "2020-02-20 00:00:00", # The timestamp when the request was + created. Required. + "model_id": "str", # The model ID that was invoked. Required. + "request_id": "str", # A unique identifier for the async invocation request. + Use this ID to check the status and retrieve the result. Required. + "status": "str", # The current status of the async invocation. Required. + Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED". + "completed_at": "2020-02-20 00:00:00", # Optional. The timestamp when the + job completed. Null until finished. + "error": "str", # Optional. Error message if the job failed. Null on + success. + "output": { + "str": {} # Optional. The output of the invocation. Null while the + job is queued or in progress. Contains the result once completed. + }, + "started_at": "2020-02-20 00:00:00" # Optional. The timestamp when the job + started processing. Null while queued. + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_inference_create_async_invoke_request( + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [202]: + if _stream: + response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore + + +class AgentInferenceOperations: + """ + .. warning:: + **DO NOT** instantiate this class directly. + + Instead, you should access the following operations through + :class:`~pydo.GeneratedClient`'s + :attr:`agent_inference` attribute. + """ + + def __init__(self, *args, **kwargs): + input_args = list(args) + self._client = input_args.pop(0) if input_args else kwargs.pop("client") + self._config = input_args.pop(0) if input_args else kwargs.pop("config") + self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer") + self._deserialize = ( + input_args.pop(0) if input_args else kwargs.pop("deserializer") + ) + + @overload + def create_chat_completion( + self, + body: JSON, + *, + agent: bool = True, + content_type: str = "application/json", + **kwargs: Any, + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation via a customer-provisioned + agent endpoint. + + :param body: Required. + :type body: JSON + :keyword agent: Must be set to true for agent-based completion behavior. Default value is True. + :paramtype agent: bool + :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "messages": [ + { + "role": "str", # The role of the message author. Required. + Known values are: "system", "developer", "user", "assistant", and "tool". + "content": "str", # Optional. The contents of the message. + "reasoning_content": "str", # Optional. The reasoning + content generated by the model (assistant messages only). + "refusal": "str", # Optional. The refusal message generated + by the model (assistant messages only). + "tool_call_id": "str", # Optional. Tool call that this + message is responding to (tool messages only). + "tool_calls": [ + { + "function": { + "arguments": "str", # The arguments + to call the function with, as generated by the model in JSON + format. Required. + "name": "str" # The name of the + function to call. Required. + }, + "id": "str", # The ID of the tool call. + Required. + "type": "str" # The type of the tool. + Currently, only function is supported. Required. "function" + } + ] + } + ], + "model": "str", # Model ID used to generate the response. Required. + "frequency_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on their existing frequency in + the text so far, decreasing the model's likelihood to repeat the same line + verbatim. + "logit_bias": { + "str": 0 # Optional. Modify the likelihood of specified tokens + appearing in the completion. Accepts a JSON object that maps tokens + (specified by their token ID in the tokenizer) to an associated bias value + from -100 to 100. Mathematically, the bias is added to the logits generated + by the model prior to sampling. The exact effect will vary per model, but + values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + }, + "logprobs": False, # Optional. Default value is False. Whether to return log + probabilities of the output tokens or not. If true, returns the log probabilities + of each output token returned in the content of message. + "max_completion_tokens": 0, # Optional. The maximum number of completion + tokens that may be used over the course of the run. The run will make a best + effort to use only the number of completion tokens specified, across multiple + turns of the run. + "max_tokens": 0, # Optional. The maximum number of tokens that can be + generated in the completion. The token count of your prompt plus max_tokens + cannot exceed the model's context length. + "metadata": { + "str": "str" # Optional. Set of 16 key-value pairs that can be + attached to an object. This can be useful for storing additional information + about the object in a structured format. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters. + }, + "n": 1, # Optional. Default value is 1. How many chat completion choices to + generate for each input message. Note that you will be charged based on the + number of generated tokens across all of the choices. Keep n as 1 to minimize + costs. + "presence_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. + "reasoning_effort": "str", # Optional. Constrains effort on reasoning for + reasoning models. Reducing reasoning effort can result in faster responses and + fewer tokens used on reasoning in a response. Known values are: "none", + "minimal", "low", "medium", "high", and "xhigh". + "seed": 0, # Optional. If specified, the system will make a best effort to + sample deterministically, such that repeated requests with the same seed and + parameters should return the same result. Determinism is not guaranteed. + "stop": {}, + "stream": False, # Optional. Default value is False. If set to true, the + model response data will be streamed to the client as it is generated using + server-sent events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data [DONE] message. The usage field on this chunk + shows the token usage statistics for the entire request, and the choices + field will always be an empty array. + }, + "temperature": 0.0, # Optional. What sampling temperature to use, between 0 + and 2. Higher values like 0.8 will make the output more random, while lower + values like 0.2 will make it more focused and deterministic. We generally + recommend altering this or top_p but not both. + "tool_choice": {}, + "tools": [ + { + "function": { + "name": "str", # The name of the function to be + called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. Required. + "description": "str", # Optional. A description of + what the function does, used by the model to choose when and how to + call the function. + "parameters": { + "str": {} # Optional. The parameters the + function accepts, described as a JSON Schema object. + } + }, + "type": "str" # The type of the tool. Currently, only + function is supported. Required. "function" + } + ], + "top_logprobs": 0, # Optional. An integer between 0 and 20 specifying the + number of most likely tokens to return at each token position, each with an + associated log probability. logprobs must be set to true if this parameter is + used. + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. So 0.1 means only the tokens comprising the top 10% + probability mass are considered. We generally recommend altering this or + temperature but not both. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + + @overload + def create_chat_completion( + self, + body: IO[bytes], + *, + agent: bool = True, + content_type: str = "application/json", + **kwargs: Any, + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation via a customer-provisioned + agent endpoint. + + :param body: Required. + :type body: IO[bytes] + :keyword agent: Must be set to true for agent-based completion behavior. Default value is True. + :paramtype agent: bool + :keyword content_type: Body Parameter content-type. Content type parameter for binary body. + Default value is "application/json". + :paramtype content_type: str + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + + @distributed_trace + def create_chat_completion( + self, body: Union[JSON, IO[bytes]], *, agent: bool = True, **kwargs: Any + ) -> JSON: + # pylint: disable=line-too-long + """Create a model response for the given chat conversation. + + Creates a model response for the given chat conversation via a customer-provisioned + agent endpoint. + + :param body: Is either a JSON type or a IO[bytes] type. Required. + :type body: JSON or IO[bytes] + :keyword agent: Must be set to true for agent-based completion behavior. Default value is True. + :paramtype agent: bool + :return: JSON object + :rtype: JSON + :raises ~azure.core.exceptions.HttpResponseError: + + Example: + .. code-block:: python + + # JSON input template you can fill out and use as your body input. + body = { + "messages": [ + { + "role": "str", # The role of the message author. Required. + Known values are: "system", "developer", "user", "assistant", and "tool". + "content": "str", # Optional. The contents of the message. + "reasoning_content": "str", # Optional. The reasoning + content generated by the model (assistant messages only). + "refusal": "str", # Optional. The refusal message generated + by the model (assistant messages only). + "tool_call_id": "str", # Optional. Tool call that this + message is responding to (tool messages only). + "tool_calls": [ + { + "function": { + "arguments": "str", # The arguments + to call the function with, as generated by the model in JSON + format. Required. + "name": "str" # The name of the + function to call. Required. + }, + "id": "str", # The ID of the tool call. + Required. + "type": "str" # The type of the tool. + Currently, only function is supported. Required. "function" + } + ] + } + ], + "model": "str", # Model ID used to generate the response. Required. + "frequency_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on their existing frequency in + the text so far, decreasing the model's likelihood to repeat the same line + verbatim. + "logit_bias": { + "str": 0 # Optional. Modify the likelihood of specified tokens + appearing in the completion. Accepts a JSON object that maps tokens + (specified by their token ID in the tokenizer) to an associated bias value + from -100 to 100. Mathematically, the bias is added to the logits generated + by the model prior to sampling. The exact effect will vary per model, but + values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + }, + "logprobs": False, # Optional. Default value is False. Whether to return log + probabilities of the output tokens or not. If true, returns the log probabilities + of each output token returned in the content of message. + "max_completion_tokens": 0, # Optional. The maximum number of completion + tokens that may be used over the course of the run. The run will make a best + effort to use only the number of completion tokens specified, across multiple + turns of the run. + "max_tokens": 0, # Optional. The maximum number of tokens that can be + generated in the completion. The token count of your prompt plus max_tokens + cannot exceed the model's context length. + "metadata": { + "str": "str" # Optional. Set of 16 key-value pairs that can be + attached to an object. This can be useful for storing additional information + about the object in a structured format. Keys are strings with a maximum + length of 64 characters. Values are strings with a maximum length of 512 + characters. + }, + "n": 1, # Optional. Default value is 1. How many chat completion choices to + generate for each input message. Note that you will be charged based on the + number of generated tokens across all of the choices. Keep n as 1 to minimize + costs. + "presence_penalty": 0, # Optional. Default value is 0. Number between -2.0 + and 2.0. Positive values penalize new tokens based on whether they appear in the + text so far, increasing the model's likelihood to talk about new topics. + "reasoning_effort": "str", # Optional. Constrains effort on reasoning for + reasoning models. Reducing reasoning effort can result in faster responses and + fewer tokens used on reasoning in a response. Known values are: "none", + "minimal", "low", "medium", "high", and "xhigh". + "seed": 0, # Optional. If specified, the system will make a best effort to + sample deterministically, such that repeated requests with the same seed and + parameters should return the same result. Determinism is not guaranteed. + "stop": {}, + "stream": False, # Optional. Default value is False. If set to true, the + model response data will be streamed to the client as it is generated using + server-sent events. + "stream_options": { + "include_usage": bool # Optional. If set, an additional chunk will + be streamed before the data [DONE] message. The usage field on this chunk + shows the token usage statistics for the entire request, and the choices + field will always be an empty array. + }, + "temperature": 0.0, # Optional. What sampling temperature to use, between 0 + and 2. Higher values like 0.8 will make the output more random, while lower + values like 0.2 will make it more focused and deterministic. We generally + recommend altering this or top_p but not both. + "tool_choice": {}, + "tools": [ + { + "function": { + "name": "str", # The name of the function to be + called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, + with a maximum length of 64. Required. + "description": "str", # Optional. A description of + what the function does, used by the model to choose when and how to + call the function. + "parameters": { + "str": {} # Optional. The parameters the + function accepts, described as a JSON Schema object. + } + }, + "type": "str" # The type of the tool. Currently, only + function is supported. Required. "function" + } + ], + "top_logprobs": 0, # Optional. An integer between 0 and 20 specifying the + number of most likely tokens to return at each token position, each with an + associated log probability. logprobs must be set to true if this parameter is + used. + "top_p": 0.0, # Optional. An alternative to sampling with temperature, + called nucleus sampling, where the model considers the results of the tokens with + top_p probability mass. So 0.1 means only the tokens comprising the top 10% + probability mass are considered. We generally recommend altering this or + temperature but not both. + "user": "str" # Optional. A unique identifier representing your end-user, + which can help DigitalOcean to monitor and detect abuse. + } + + # response body for status code(s): 200 + response == { + "choices": [ + { + "finish_reason": "str", # The reason the model stopped + generating tokens. stop if the model hit a natural stop point or a + provided stop sequence, length if the maximum number of tokens specified + in the request was reached, tool_calls if the model called a tool. + Required. Known values are: "stop", "length", "tool_calls", and + "content_filter". + "index": 0, # The index of the choice in the list of + choices. Required. + "logprobs": { + "content": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ], + "refusal": [ + { + "bytes": [ + 0 # A list of integers + representing the UTF-8 bytes representation of the token. + Can be null if there is no bytes representation for the + token. Required. + ], + "logprob": 0.0, # The log + probability of this token, if it is within the top 20 most + likely tokens. Otherwise, the value -9999.0 is used to + signify that the token is very unlikely. Required. + "token": "str", # The token. + Required. + "top_logprobs": [ + { + "bytes": [ + 0 # + Required. + ], + "logprob": 0.0, # + The log probability of this token. Required. + "token": "str" # The + token. Required. + } + ] + } + ] + }, + "message": { + "content": "str", # The contents of the message. + Required. + "reasoning_content": "str", # The reasoning content + generated by the model. Required. + "refusal": "str", # The refusal message generated by + the model. Required. + "role": "str", # The role of the author of this + message. Required. "assistant" + "tool_calls": [ + { + "function": { + "arguments": "str", # The + arguments to call the function with. Required. + "name": "str" # The name of + the function to call. Required. + }, + "id": "str", # The ID of the tool + call. Required. + "type": "str" # The type of the + tool. Required. "function" + } + ] + } + } + ], + "created": 0, # The Unix timestamp (in seconds) of when the chat completion + was created. Required. + "id": "str", # A unique identifier for the chat completion. Required. + "model": "str", # The model used for the chat completion. Required. + "object": "str", # The object type, which is always chat.completion. + Required. "chat.completion" + "usage": { + "cache_created_input_tokens": 0, # Default value is 0. Number of + prompt tokens written to cache. Required. + "cache_creation": { + "ephemeral_1h_input_tokens": 0, # Default value is 0. Number + of prompt tokens written to 1h cache. Required. + "ephemeral_5m_input_tokens": 0 # Default value is 0. Number + of prompt tokens written to 5m cache. Required. + }, + "cache_read_input_tokens": 0, # Default value is 0. Number of prompt + tokens read from cache. Required. + "completion_tokens": 0, # Default value is 0. Number of tokens in + the generated completion. Required. + "prompt_tokens": 0, # Default value is 0. Number of tokens in the + prompt. Required. + "total_tokens": 0 # Default value is 0. Total number of tokens used + in the request (prompt + completion). Required. + } + } + """ + error_map: MutableMapping[int, Type[HttpResponseError]] = { + 404: ResourceNotFoundError, + 409: ResourceExistsError, + 304: ResourceNotModifiedError, + 401: cast( + Type[HttpResponseError], + lambda response: ClientAuthenticationError(response=response), + ), + 429: HttpResponseError, + 500: HttpResponseError, + } + error_map.update(kwargs.pop("error_map", {}) or {}) + + _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) + _params = kwargs.pop("params", {}) or {} + + content_type: Optional[str] = kwargs.pop( + "content_type", _headers.pop("Content-Type", None) + ) + cls: ClsType[JSON] = kwargs.pop("cls", None) + + content_type = content_type or "application/json" + _json = None + _content = None + if isinstance(body, (IOBase, bytes)): + _content = body + else: + _json = body + + _request = build_agent_inference_create_chat_completion_request( + agent=agent, + content_type=content_type, + json=_json, + content=_content, + headers=_headers, + params=_params, + ) + _request.url = self._client.format_url(_request.url) + + _stream = False + pipeline_response: PipelineResponse = ( + self._client._pipeline.run( # pylint: disable=protected-access + _request, stream=_stream, **kwargs + ) + ) + + response = pipeline_response.http_response + + if response.status_code not in [200]: + if _stream: + response.read() # Load the body in memory and close the socket + map_error(status_code=response.status_code, response=response, error_map=error_map) # type: ignore + raise HttpResponseError(response=response) + + response_headers = {} + response_headers["ratelimit-limit"] = self._deserialize( + "int", response.headers.get("ratelimit-limit") + ) + response_headers["ratelimit-remaining"] = self._deserialize( + "int", response.headers.get("ratelimit-remaining") + ) + response_headers["ratelimit-reset"] = self._deserialize( + "int", response.headers.get("ratelimit-reset") + ) + + if response.content: + deserialized = response.json() + else: + deserialized = None + + if cls: + return cls(pipeline_response, cast(JSON, deserialized), response_headers) # type: ignore + + return cast(JSON, deserialized) # type: ignore