From 0b6cb154337eeb8641a853bf6ff0b3b884c1c05a Mon Sep 17 00:00:00 2001
From: API Engineering <api-engineering@digitalocean.com>
Date: Wed, 25 Mar 2026 17:05:56 +0000
Subject: [PATCH] [bot] Updated client based on openapi-1e369f0/clientgen

---
 DO_OPENAPI_COMMIT_SHA.txt              |    2 +-
 src/pydo/_client.py                    |   12 +
 src/pydo/aio/_client.py                |   12 +
 src/pydo/aio/operations/__init__.py    |    4 +
 src/pydo/aio/operations/_operations.py | 2661 ++++++++++++++++++++++
 src/pydo/operations/__init__.py        |    4 +
 src/pydo/operations/_operations.py     | 2782 ++++++++++++++++++++++++
 7 files changed, 5476 insertions(+), 1 deletion(-)

diff --git a/DO_OPENAPI_COMMIT_SHA.txt b/DO_OPENAPI_COMMIT_SHA.txt
index 11ac4a0..76cae07 100644
--- a/DO_OPENAPI_COMMIT_SHA.txt
+++ b/DO_OPENAPI_COMMIT_SHA.txt
@@ -1 +1 @@
-ebfa95a
+1e369f0
diff --git a/src/pydo/_client.py b/src/pydo/_client.py
index 0eee1c6..c02a3cc 100644
--- a/src/pydo/_client.py
+++ b/src/pydo/_client.py
@@ -17,6 +17,7 @@
     AccountOperations,
     ActionsOperations,
     AddonsOperations,
+    AgentInferenceOperations,
     AppsOperations,
     AutoscalepoolsOperations,
     BalanceOperations,
@@ -36,6 +37,7 @@
     GenaiOperations,
     ImageActionsOperations,
     ImagesOperations,
+    InferenceOperations,
     InvoicesOperations,
     KubernetesOperations,
     LoadBalancersOperations,
@@ -674,6 +676,10 @@ class GeneratedClient:  # pylint: disable=client-accepts-api-version-keyword,too
     :vartype uptime: pydo.operations.UptimeOperations
     :ivar genai: GenaiOperations operations
     :vartype genai: pydo.operations.GenaiOperations
+    :ivar inference: InferenceOperations operations
+    :vartype inference: pydo.operations.InferenceOperations
+    :ivar agent_inference: AgentInferenceOperations operations
+    :vartype agent_inference: pydo.operations.AgentInferenceOperations
     :param credential: Credential needed for the client to connect to Azure. Required.
     :type credential: ~azure.core.credentials.TokenCredential
     :keyword endpoint: Service URL. Default value is "https://api.digitalocean.com".
@@ -866,6 +872,12 @@ def __init__(
         self.genai = GenaiOperations(
             self._client, self._config, self._serialize, self._deserialize
         )
+        self.inference = InferenceOperations(
+            self._client, self._config, self._serialize, self._deserialize
+        )
+        self.agent_inference = AgentInferenceOperations(
+            self._client, self._config, self._serialize, self._deserialize
+        )
 
     def send_request(
         self, request: HttpRequest, *, stream: bool = False, **kwargs: Any
diff --git a/src/pydo/aio/_client.py b/src/pydo/aio/_client.py
index 64d498b..570fb5d 100644
--- a/src/pydo/aio/_client.py
+++ b/src/pydo/aio/_client.py
@@ -17,6 +17,7 @@
     AccountOperations,
     ActionsOperations,
     AddonsOperations,
+    AgentInferenceOperations,
     AppsOperations,
     AutoscalepoolsOperations,
     BalanceOperations,
@@ -36,6 +37,7 @@
     GenaiOperations,
     ImageActionsOperations,
     ImagesOperations,
+    InferenceOperations,
     InvoicesOperations,
     KubernetesOperations,
     LoadBalancersOperations,
@@ -674,6 +676,10 @@ class GeneratedClient:  # pylint: disable=client-accepts-api-version-keyword,too
     :vartype uptime: pydo.aio.operations.UptimeOperations
     :ivar genai: GenaiOperations operations
     :vartype genai: pydo.aio.operations.GenaiOperations
+    :ivar inference: InferenceOperations operations
+    :vartype inference: pydo.aio.operations.InferenceOperations
+    :ivar agent_inference: AgentInferenceOperations operations
+    :vartype agent_inference: pydo.aio.operations.AgentInferenceOperations
     :param credential: Credential needed for the client to connect to Azure. Required.
     :type credential: ~azure.core.credentials_async.AsyncTokenCredential
     :keyword endpoint: Service URL. Default value is "https://api.digitalocean.com".
@@ -866,6 +872,12 @@ def __init__(
         self.genai = GenaiOperations(
             self._client, self._config, self._serialize, self._deserialize
         )
+        self.inference = InferenceOperations(
+            self._client, self._config, self._serialize, self._deserialize
+        )
+        self.agent_inference = AgentInferenceOperations(
+            self._client, self._config, self._serialize, self._deserialize
+        )
 
     def send_request(
         self, request: HttpRequest, *, stream: bool = False, **kwargs: Any
diff --git a/src/pydo/aio/operations/__init__.py b/src/pydo/aio/operations/__init__.py
index 0c8c048..4286825 100644
--- a/src/pydo/aio/operations/__init__.py
+++ b/src/pydo/aio/operations/__init__.py
@@ -54,6 +54,8 @@
 from ._operations import VpcnatgatewaysOperations
 from ._operations import UptimeOperations
 from ._operations import GenaiOperations
+from ._operations import InferenceOperations
+from ._operations import AgentInferenceOperations
 
 from ._patch import __all__ as _patch_all
 from ._patch import *  # pylint: disable=unused-wildcard-import
@@ -110,6 +112,8 @@
     "VpcnatgatewaysOperations",
     "UptimeOperations",
     "GenaiOperations",
+    "InferenceOperations",
+    "AgentInferenceOperations",
 ]
 __all__.extend([p for p in _patch_all if p not in __all__])
 _patch_sdk()
diff --git a/src/pydo/aio/operations/_operations.py b/src/pydo/aio/operations/_operations.py
index 00b59bc..fee4c4f 100644
--- a/src/pydo/aio/operations/_operations.py
+++ b/src/pydo/aio/operations/_operations.py
@@ -47,6 +47,7 @@
     build_addons_list_request,
     build_addons_patch_plan_request,
     build_addons_patch_request,
+    build_agent_inference_create_chat_completion_request,
     build_apps_assign_alert_destinations_request,
     build_apps_cancel_deployment_request,
     build_apps_cancel_event_request,
@@ -346,6 +347,11 @@
     build_images_get_request,
     build_images_list_request,
     build_images_update_request,
+    build_inference_create_async_invoke_request,
+    build_inference_create_chat_completion_request,
+    build_inference_create_image_request,
+    build_inference_create_response_request,
+    build_inference_list_models_request,
     build_invoices_get_by_uuid_request,
     build_invoices_get_csv_by_uuid_request,
     build_invoices_get_pdf_by_uuid_request,
@@ -252674,3 +252680,2658 @@ async def list_evaluation_test_cases_by_workspace(
             return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
 
         return cast(JSON, deserialized)  # type: ignore
+
+
+class InferenceOperations:
+    """
+    .. warning::
+        **DO NOT** instantiate this class directly.
+
+        Instead, you should access the following operations through
+        :class:`~pydo.aio.GeneratedClient`'s
+        :attr:`inference` attribute.
+    """
+
+    def __init__(self, *args, **kwargs) -> None:
+        input_args = list(args)
+        self._client = input_args.pop(0) if input_args else kwargs.pop("client")
+        self._config = input_args.pop(0) if input_args else kwargs.pop("config")
+        self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer")
+        self._deserialize = (
+            input_args.pop(0) if input_args else kwargs.pop("deserializer")
+        )
+
+    @overload
+    async def create_chat_completion(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "messages": [
+                        {
+                            "role": "str",  # The role of the message author. Required.
+                              Known values are: "system", "developer", "user", "assistant", and "tool".
+                            "content": "str",  # Optional. The contents of the message.
+                            "reasoning_content": "str",  # Optional. The reasoning
+                              content generated by the model (assistant messages only).
+                            "refusal": "str",  # Optional. The refusal message generated
+                              by the model (assistant messages only).
+                            "tool_call_id": "str",  # Optional. Tool call that this
+                              message is responding to (tool messages only).
+                            "tool_calls": [
+                                {
+                                    "function": {
+                                        "arguments": "str",  # The arguments
+                                          to call the function with, as generated by the model in JSON
+                                          format. Required.
+                                        "name": "str"  # The name of the
+                                          function to call. Required.
+                                    },
+                                    "id": "str",  # The ID of the tool call.
+                                      Required.
+                                    "type": "str"  # The type of the tool.
+                                      Currently, only function is supported. Required. "function"
+                                }
+                            ]
+                        }
+                    ],
+                    "model": "str",  # Model ID used to generate the response. Required.
+                    "frequency_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on their existing frequency in
+                      the text so far, decreasing the model's likelihood to repeat the same line
+                      verbatim.
+                    "logit_bias": {
+                        "str": 0  # Optional. Modify the likelihood of specified tokens
+                          appearing in the completion. Accepts a JSON object that maps tokens
+                          (specified by their token ID in the tokenizer) to an associated bias value
+                          from -100 to 100. Mathematically, the bias is added to the logits generated
+                          by the model prior to sampling. The exact effect will vary per model, but
+                          values between -1 and 1 should decrease or increase likelihood of selection;
+                          values like -100 or 100 should result in a ban or exclusive selection of the
+                          relevant token.
+                    },
+                    "logprobs": False,  # Optional. Default value is False. Whether to return log
+                      probabilities of the output tokens or not. If true, returns the log probabilities
+                      of each output token returned in the content of message.
+                    "max_completion_tokens": 0,  # Optional. The maximum number of completion
+                      tokens that may be used over the course of the run. The run will make a best
+                      effort to use only the number of completion tokens specified, across multiple
+                      turns of the run.
+                    "max_tokens": 0,  # Optional. The maximum number of tokens that can be
+                      generated in the completion. The token count of your prompt plus max_tokens
+                      cannot exceed the model's context length.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of 16 key-value pairs that can be
+                          attached to an object. This can be useful for storing additional information
+                          about the object in a structured format. Keys are strings with a maximum
+                          length of 64 characters. Values are strings with a maximum length of 512
+                          characters.
+                    },
+                    "n": 1,  # Optional. Default value is 1. How many chat completion choices to
+                      generate for each input message. Note that you will be charged based on the
+                      number of generated tokens across all of the choices. Keep n as 1 to minimize
+                      costs.
+                    "presence_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on whether they appear in the
+                      text so far, increasing the model's likelihood to talk about new topics.
+                    "reasoning_effort": "str",  # Optional. Constrains effort on reasoning for
+                      reasoning models. Reducing reasoning effort can result in faster responses and
+                      fewer tokens used on reasoning in a response. Known values are: "none",
+                      "minimal", "low", "medium", "high", and "xhigh".
+                    "seed": 0,  # Optional. If specified, the system will make a best effort to
+                      sample deterministically, such that repeated requests with the same seed and
+                      parameters should return the same result. Determinism is not guaranteed.
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. If set to true, the
+                      model response data will be streamed to the client as it is generated using
+                      server-sent events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data [DONE] message. The usage field on this chunk
+                          shows the token usage statistics for the entire request, and the choices
+                          field will always be an empty array.
+                    },
+                    "temperature": 0.0,  # Optional. What sampling temperature to use, between 0
+                      and 2. Higher values like 0.8 will make the output more random, while lower
+                      values like 0.2 will make it more focused and deterministic. We generally
+                      recommend altering this or top_p but not both.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "function": {
+                                "name": "str",  # The name of the function to be
+                                  called. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
+                                  with a maximum length of 64. Required.
+                                "description": "str",  # Optional. A description of
+                                  what the function does, used by the model to choose when and how to
+                                  call the function.
+                                "parameters": {
+                                    "str": {}  # Optional. The parameters the
+                                      function accepts, described as a JSON Schema object.
+                                }
+                            },
+                            "type": "str"  # The type of the tool. Currently, only
+                              function is supported. Required. "function"
+                        }
+                    ],
+                    "top_logprobs": 0,  # Optional. An integer between 0 and 20 specifying the
+                      number of most likely tokens to return at each token position, each with an
+                      associated log probability. logprobs must be set to true if this parameter is
+                      used.
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+                      probability mass are considered. We generally recommend altering this or
+                      temperature but not both.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+
+    @overload
+    async def create_chat_completion(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+
+    @distributed_trace_async
+    async def create_chat_completion(
+        self, body: Union[JSON, IO[bytes]], **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "messages": [
+                        {
+                            "role": "str",  # The role of the message author. Required.
+                              Known values are: "system", "developer", "user", "assistant", and "tool".
+                            "content": "str",  # Optional. The contents of the message.
+                            "reasoning_content": "str",  # Optional. The reasoning
+                              content generated by the model (assistant messages only).
+                            "refusal": "str",  # Optional. The refusal message generated
+                              by the model (assistant messages only).
+                            "tool_call_id": "str",  # Optional. Tool call that this
+                              message is responding to (tool messages only).
+                            "tool_calls": [
+                                {
+                                    "function": {
+                                        "arguments": "str",  # The arguments
+                                          to call the function with, as generated by the model in JSON
+                                          format. Required.
+                                        "name": "str"  # The name of the
+                                          function to call. Required.
+                                    },
+                                    "id": "str",  # The ID of the tool call.
+                                      Required.
+                                    "type": "str"  # The type of the tool.
+                                      Currently, only function is supported. Required. "function"
+                                }
+                            ]
+                        }
+                    ],
+                    "model": "str",  # Model ID used to generate the response. Required.
+                    "frequency_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on their existing frequency in
+                      the text so far, decreasing the model's likelihood to repeat the same line
+                      verbatim.
+                    "logit_bias": {
+                        "str": 0  # Optional. Modify the likelihood of specified tokens
+                          appearing in the completion. Accepts a JSON object that maps tokens
+                          (specified by their token ID in the tokenizer) to an associated bias value
+                          from -100 to 100. Mathematically, the bias is added to the logits generated
+                          by the model prior to sampling. The exact effect will vary per model, but
+                          values between -1 and 1 should decrease or increase likelihood of selection;
+                          values like -100 or 100 should result in a ban or exclusive selection of the
+                          relevant token.
+                    },
+                    "logprobs": False,  # Optional. Default value is False. Whether to return log
+                      probabilities of the output tokens or not. If true, returns the log probabilities
+                      of each output token returned in the content of message.
+                    "max_completion_tokens": 0,  # Optional. The maximum number of completion
+                      tokens that may be used over the course of the run. The run will make a best
+                      effort to use only the number of completion tokens specified, across multiple
+                      turns of the run.
+                    "max_tokens": 0,  # Optional. The maximum number of tokens that can be
+                      generated in the completion. The token count of your prompt plus max_tokens
+                      cannot exceed the model's context length.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of 16 key-value pairs that can be
+                          attached to an object. This can be useful for storing additional information
+                          about the object in a structured format. Keys are strings with a maximum
+                          length of 64 characters. Values are strings with a maximum length of 512
+                          characters.
+                    },
+                    "n": 1,  # Optional. Default value is 1. How many chat completion choices to
+                      generate for each input message. Note that you will be charged based on the
+                      number of generated tokens across all of the choices. Keep n as 1 to minimize
+                      costs.
+                    "presence_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on whether they appear in the
+                      text so far, increasing the model's likelihood to talk about new topics.
+                    "reasoning_effort": "str",  # Optional. Constrains effort on reasoning for
+                      reasoning models. Reducing reasoning effort can result in faster responses and
+                      fewer tokens used on reasoning in a response. Known values are: "none",
+                      "minimal", "low", "medium", "high", and "xhigh".
+                    "seed": 0,  # Optional. If specified, the system will make a best effort to
+                      sample deterministically, such that repeated requests with the same seed and
+                      parameters should return the same result. Determinism is not guaranteed.
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. If set to true, the
+                      model response data will be streamed to the client as it is generated using
+                      server-sent events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data [DONE] message. The usage field on this chunk
+                          shows the token usage statistics for the entire request, and the choices
+                          field will always be an empty array.
+                    },
+                    "temperature": 0.0,  # Optional. What sampling temperature to use, between 0
+                      and 2. Higher values like 0.8 will make the output more random, while lower
+                      values like 0.2 will make it more focused and deterministic. We generally
+                      recommend altering this or top_p but not both.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "function": {
+                                "name": "str",  # The name of the function to be
+                                  called. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
+                                  with a maximum length of 64. Required.
+                                "description": "str",  # Optional. A description of
+                                  what the function does, used by the model to choose when and how to
+                                  call the function.
+                                "parameters": {
+                                    "str": {}  # Optional. The parameters the
+                                      function accepts, described as a JSON Schema object.
+                                }
+                            },
+                            "type": "str"  # The type of the tool. Currently, only
+                              function is supported. Required. "function"
+                        }
+                    ],
+                    "top_logprobs": 0,  # Optional. An integer between 0 and 20 specifying the
+                      number of most likely tokens to return at each token position, each with an
+                      associated log probability. logprobs must be set to true if this parameter is
+                      used.
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+                      probability mass are considered. We generally recommend altering this or
+                      temperature but not both.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_inference_create_chat_completion_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    async def create_image(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate images from text prompts.
+
+        Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation
+        model with automatic prompt optimization and enhanced visual capabilities.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "model": "str",  # The model to use for image generation. Required.
+                    "n": 0,  # The number of images to generate. Must be between 1 and 10.
+                      Required.
+                    "prompt": "str",  # A text description of the desired image(s). Supports up
+                      to 32,000 characters and provides automatic prompt optimization for best results.
+                      Required.
+                    "background": "str",  # Optional. The background setting for the image
+                      generation. Supported values: transparent, opaque, auto.
+                    "moderation": "str",  # Optional. The moderation setting for the image
+                      generation. Supported values: low, auto.
+                    "output_compression": 0,  # Optional. The output compression level for the
+                      image generation (0-100).
+                    "output_format": "str",  # Optional. The output format for the image
+                      generation. Supported values: png, webp, jpeg.
+                    "partial_images": 0,  # Optional. The number of partial image chunks to
+                      return during streaming generation. Defaults to 0. When stream=true, this must be
+                      greater than 0 to receive progressive updates of the image as it is being
+                      generated.
+                    "quality": "str",  # Optional. The quality of the image that will be
+                      generated. Supported values: auto, high, medium, low.
+                    "size": "str",  # Optional. The size of the generated images. GPT-IMAGE-1
+                      supports: auto (automatically select best size), 1536x1024 (landscape), 1024x1536
+                      (portrait). Known values are: "auto", "1536x1024", and "1024x1536".
+                    "stream": False,  # Optional. Default value is False. If set to true, partial
+                      image data will be streamed as the image is being generated. The response will be
+                      sent as server-sent events with partial image chunks. When stream is true,
+                      partial_images must be greater than 0.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the images were
+                      created. Required.
+                    "data": [
+                        {
+                            "b64_json": "str",  # The base64-encoded JSON of the
+                              generated image. Required.
+                            "revised_prompt": "str"  # Optional. The optimized prompt
+                              that was used to generate the image.
+                        }
+                    ],
+                    "background": "str",  # Optional. The background setting used for the image
+                      generation.
+                    "output_format": "str",  # Optional. The output format of the generated
+                      image.
+                    "quality": "str",  # Optional. The quality setting used for the image
+                      generation.
+                    "size": "str",  # Optional. The size of the generated image.
+                    "usage": {
+                        "input_tokens": 0,  # The number of tokens (images and text) in the
+                          input prompt. Required.
+                        "input_tokens_details": {
+                            "image_tokens": 0,  # The number of image tokens in the input
+                              prompt. Required.
+                            "text_tokens": 0  # The number of text tokens in the input
+                              prompt. Required.
+                        },
+                        "output_tokens": 0,  # The number of image tokens in the output
+                          image. Required.
+                        "total_tokens": 0  # The total number of tokens (images and text)
+                          used for the image generation. Required.
+                    }
+                }
+        """
+
+    @overload
+    async def create_image(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        """Generate images from text prompts.
+
+        Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation
+        model with automatic prompt optimization and enhanced visual capabilities.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the images were
+                      created. Required.
+                    "data": [
+                        {
+                            "b64_json": "str",  # The base64-encoded JSON of the
+                              generated image. Required.
+                            "revised_prompt": "str"  # Optional. The optimized prompt
+                              that was used to generate the image.
+                        }
+                    ],
+                    "background": "str",  # Optional. The background setting used for the image
+                      generation.
+                    "output_format": "str",  # Optional. The output format of the generated
+                      image.
+                    "quality": "str",  # Optional. The quality setting used for the image
+                      generation.
+                    "size": "str",  # Optional. The size of the generated image.
+                    "usage": {
+                        "input_tokens": 0,  # The number of tokens (images and text) in the
+                          input prompt. Required.
+                        "input_tokens_details": {
+                            "image_tokens": 0,  # The number of image tokens in the input
+                              prompt. Required.
+                            "text_tokens": 0  # The number of text tokens in the input
+                              prompt. Required.
+                        },
+                        "output_tokens": 0,  # The number of image tokens in the output
+                          image. Required.
+                        "total_tokens": 0  # The total number of tokens (images and text)
+                          used for the image generation. Required.
+                    }
+                }
+        """
+
+    @distributed_trace_async
+    async def create_image(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate images from text prompts.
+
+        Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation
+        model with automatic prompt optimization and enhanced visual capabilities.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "model": "str",  # The model to use for image generation. Required.
+                    "n": 0,  # The number of images to generate. Must be between 1 and 10.
+                      Required.
+                    "prompt": "str",  # A text description of the desired image(s). Supports up
+                      to 32,000 characters and provides automatic prompt optimization for best results.
+                      Required.
+                    "background": "str",  # Optional. The background setting for the image
+                      generation. Supported values: transparent, opaque, auto.
+                    "moderation": "str",  # Optional. The moderation setting for the image
+                      generation. Supported values: low, auto.
+                    "output_compression": 0,  # Optional. The output compression level for the
+                      image generation (0-100).
+                    "output_format": "str",  # Optional. The output format for the image
+                      generation. Supported values: png, webp, jpeg.
+                    "partial_images": 0,  # Optional. The number of partial image chunks to
+                      return during streaming generation. Defaults to 0. When stream=true, this must be
+                      greater than 0 to receive progressive updates of the image as it is being
+                      generated.
+                    "quality": "str",  # Optional. The quality of the image that will be
+                      generated. Supported values: auto, high, medium, low.
+                    "size": "str",  # Optional. The size of the generated images. GPT-IMAGE-1
+                      supports: auto (automatically select best size), 1536x1024 (landscape), 1024x1536
+                      (portrait). Known values are: "auto", "1536x1024", and "1024x1536".
+                    "stream": False,  # Optional. Default value is False. If set to true, partial
+                      image data will be streamed as the image is being generated. The response will be
+                      sent as server-sent events with partial image chunks. When stream is true,
+                      partial_images must be greater than 0.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the images were
+                      created. Required.
+                    "data": [
+                        {
+                            "b64_json": "str",  # The base64-encoded JSON of the
+                              generated image. Required.
+                            "revised_prompt": "str"  # Optional. The optimized prompt
+                              that was used to generate the image.
+                        }
+                    ],
+                    "background": "str",  # Optional. The background setting used for the image
+                      generation.
+                    "output_format": "str",  # Optional. The output format of the generated
+                      image.
+                    "quality": "str",  # Optional. The quality setting used for the image
+                      generation.
+                    "size": "str",  # Optional. The size of the generated image.
+                    "usage": {
+                        "input_tokens": 0,  # The number of tokens (images and text) in the
+                          input prompt. Required.
+                        "input_tokens_details": {
+                            "image_tokens": 0,  # The number of image tokens in the input
+                              prompt. Required.
+                            "text_tokens": 0  # The number of text tokens in the input
+                              prompt. Required.
+                        },
+                        "output_tokens": 0,  # The number of image tokens in the output
+                          image. Required.
+                        "total_tokens": 0  # The total number of tokens (images and text)
+                          used for the image generation. Required.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_inference_create_image_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace_async
+    async def list_models(self, **kwargs: Any) -> JSON:
+        """List available models.
+
+        Lists the currently available models, and provides basic information about each one such as the
+        owner and availability.
+
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "data": [
+                        {
+                            "created": 0,  # The Unix timestamp (in seconds) when the
+                              model was created. Required.
+                            "id": "str",  # The model identifier, which can be referenced
+                              in the API endpoints. Required.
+                            "object": "str",  # The object type, which is always "model".
+                              Required. "model"
+                            "owned_by": "str"  # The organization that owns the model.
+                              Required.
+                        }
+                    ],
+                    "object": "str"  # The object type, which is always "list". Required. "list"
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_inference_list_models_request(
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    async def create_response(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Send Prompt to a Model Using the Responses API.
+
+        Generate text responses from text prompts. This endpoint supports both streaming and
+        non-streaming responses for supported text models.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "input": {},
+                    "model": "str",  # The model ID of the model you want to use. Get the model
+                      ID using ``/v1/models`` or on the available models page. Required.
+                    "instructions": "str",  # Optional. System-level instructions for the model.
+                      This sets the behavior and context for the response generation.
+                    "max_output_tokens": 0,  # Optional. The maximum number of tokens to generate
+                      in the response.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of key-value pairs that can be attached
+                          to the request.
+                    },
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. Set to true to stream
+                      partial responses as Server-Sent Events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data: [DONE] message with token usage statistics for
+                          the entire request.
+                    },
+                    "temperature": 0.0,  # Optional. A value between 0.0 and 2.0 to control
+                      randomness and creativity. Lower values like 0.2 make the output more focused and
+                      deterministic, while higher values like 0.8 make it more random.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function to be
+                              called.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts, described as a JSON Schema object.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass.
+                    "user": "str"  # Optional. A unique identifier representing your end-user.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the response was
+                      created. Required.
+                    "id": "str",  # A unique identifier for the response. Required.
+                    "model": "str",  # The model used to generate the response. Required.
+                    "object": "str",  # The object type, which is always ``response``. Required.
+                      "response"
+                    "output": [
+                        {
+                            "content": [
+                                {
+                                    "text": "str",  # The text content. Required.
+                                    "type": "str"  # The type of content part.
+                                      ``reasoning_text`` for reasoning content, ``output_text`` for
+                                      final output text. Required. Known values are: "reasoning_text"
+                                      and "output_text".
+                                }
+                            ],
+                            "type": "str",  # The type of output item. One of
+                              ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known
+                              values are: "reasoning", "message", and "function_call".
+                            "arguments": "str",  # Optional. JSON string of function
+                              arguments (present when type is ``function_call``"" ).
+                            "call_id": "str",  # Optional. The unique ID of the function
+                              tool call (present when type is ``function_call``"" ).
+                            "id": "str",  # Optional. The unique ID of the output item.
+                            "name": "str",  # Optional. The name of the function to call
+                              (present when type is ``function_call``"" ).
+                            "role": "str",  # Optional. The role associated with this
+                              output item (typically ``assistant``"" ).
+                            "status": "str"  # Optional. Status of the item.
+                        }
+                    ],
+                    "usage": {
+                        "input_tokens": 0,  # The number of input tokens. Required.
+                        "input_tokens_details": {
+                            "cached_tokens": 0  # The number of tokens that were
+                              retrieved from the cache. Required.
+                        },
+                        "output_tokens": 0,  # The number of output tokens. Required.
+                        "output_tokens_details": {
+                            "reasoning_tokens": 0,  # The number of reasoning tokens.
+                              Required.
+                            "tool_output_tokens": 0  # The number of tool output tokens.
+                              Required.
+                        },
+                        "total_tokens": 0  # The total number of tokens used. Required.
+                    },
+                    "max_output_tokens": 0,  # Optional. Maximum output tokens setting.
+                    "parallel_tool_calls": bool,  # Optional. Whether parallel tool calls are
+                      enabled.
+                    "status": "str",  # Optional. Status of the response.
+                    "temperature": 0.0,  # Optional. Temperature setting used for the response.
+                    "tool_choice": "str",  # Optional. Tool choice setting used for the response.
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. Top-p setting used for the response.
+                    "user": "str"  # Optional. User identifier.
+                }
+        """
+
+    @overload
+    async def create_response(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Send Prompt to a Model Using the Responses API.
+
+        Generate text responses from text prompts. This endpoint supports both streaming and
+        non-streaming responses for supported text models.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the response was
+                      created. Required.
+                    "id": "str",  # A unique identifier for the response. Required.
+                    "model": "str",  # The model used to generate the response. Required.
+                    "object": "str",  # The object type, which is always ``response``. Required.
+                      "response"
+                    "output": [
+                        {
+                            "content": [
+                                {
+                                    "text": "str",  # The text content. Required.
+                                    "type": "str"  # The type of content part.
+                                      ``reasoning_text`` for reasoning content, ``output_text`` for
+                                      final output text. Required. Known values are: "reasoning_text"
+                                      and "output_text".
+                                }
+                            ],
+                            "type": "str",  # The type of output item. One of
+                              ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known
+                              values are: "reasoning", "message", and "function_call".
+                            "arguments": "str",  # Optional. JSON string of function
+                              arguments (present when type is ``function_call``"" ).
+                            "call_id": "str",  # Optional. The unique ID of the function
+                              tool call (present when type is ``function_call``"" ).
+                            "id": "str",  # Optional. The unique ID of the output item.
+                            "name": "str",  # Optional. The name of the function to call
+                              (present when type is ``function_call``"" ).
+                            "role": "str",  # Optional. The role associated with this
+                              output item (typically ``assistant``"" ).
+                            "status": "str"  # Optional. Status of the item.
+                        }
+                    ],
+                    "usage": {
+                        "input_tokens": 0,  # The number of input tokens. Required.
+                        "input_tokens_details": {
+                            "cached_tokens": 0  # The number of tokens that were
+                              retrieved from the cache. Required.
+                        },
+                        "output_tokens": 0,  # The number of output tokens. Required.
+                        "output_tokens_details": {
+                            "reasoning_tokens": 0,  # The number of reasoning tokens.
+                              Required.
+                            "tool_output_tokens": 0  # The number of tool output tokens.
+                              Required.
+                        },
+                        "total_tokens": 0  # The total number of tokens used. Required.
+                    },
+                    "max_output_tokens": 0,  # Optional. Maximum output tokens setting.
+                    "parallel_tool_calls": bool,  # Optional. Whether parallel tool calls are
+                      enabled.
+                    "status": "str",  # Optional. Status of the response.
+                    "temperature": 0.0,  # Optional. Temperature setting used for the response.
+                    "tool_choice": "str",  # Optional. Tool choice setting used for the response.
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. Top-p setting used for the response.
+                    "user": "str"  # Optional. User identifier.
+                }
+        """
+
+    @distributed_trace_async
+    async def create_response(
+        self, body: Union[JSON, IO[bytes]], **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Send Prompt to a Model Using the Responses API.
+
+        Generate text responses from text prompts. This endpoint supports both streaming and
+        non-streaming responses for supported text models.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "input": {},
+                    "model": "str",  # The model ID of the model you want to use. Get the model
+                      ID using ``/v1/models`` or on the available models page. Required.
+                    "instructions": "str",  # Optional. System-level instructions for the model.
+                      This sets the behavior and context for the response generation.
+                    "max_output_tokens": 0,  # Optional. The maximum number of tokens to generate
+                      in the response.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of key-value pairs that can be attached
+                          to the request.
+                    },
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. Set to true to stream
+                      partial responses as Server-Sent Events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data: [DONE] message with token usage statistics for
+                          the entire request.
+                    },
+                    "temperature": 0.0,  # Optional. A value between 0.0 and 2.0 to control
+                      randomness and creativity. Lower values like 0.2 make the output more focused and
+                      deterministic, while higher values like 0.8 make it more random.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function to be
+                              called.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts, described as a JSON Schema object.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass.
+                    "user": "str"  # Optional. A unique identifier representing your end-user.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the response was
+                      created. Required.
+                    "id": "str",  # A unique identifier for the response. Required.
+                    "model": "str",  # The model used to generate the response. Required.
+                    "object": "str",  # The object type, which is always ``response``. Required.
+                      "response"
+                    "output": [
+                        {
+                            "content": [
+                                {
+                                    "text": "str",  # The text content. Required.
+                                    "type": "str"  # The type of content part.
+                                      ``reasoning_text`` for reasoning content, ``output_text`` for
+                                      final output text. Required. Known values are: "reasoning_text"
+                                      and "output_text".
+                                }
+                            ],
+                            "type": "str",  # The type of output item. One of
+                              ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known
+                              values are: "reasoning", "message", and "function_call".
+                            "arguments": "str",  # Optional. JSON string of function
+                              arguments (present when type is ``function_call``"" ).
+                            "call_id": "str",  # Optional. The unique ID of the function
+                              tool call (present when type is ``function_call``"" ).
+                            "id": "str",  # Optional. The unique ID of the output item.
+                            "name": "str",  # Optional. The name of the function to call
+                              (present when type is ``function_call``"" ).
+                            "role": "str",  # Optional. The role associated with this
+                              output item (typically ``assistant``"" ).
+                            "status": "str"  # Optional. Status of the item.
+                        }
+                    ],
+                    "usage": {
+                        "input_tokens": 0,  # The number of input tokens. Required.
+                        "input_tokens_details": {
+                            "cached_tokens": 0  # The number of tokens that were
+                              retrieved from the cache. Required.
+                        },
+                        "output_tokens": 0,  # The number of output tokens. Required.
+                        "output_tokens_details": {
+                            "reasoning_tokens": 0,  # The number of reasoning tokens.
+                              Required.
+                            "tool_output_tokens": 0  # The number of tool output tokens.
+                              Required.
+                        },
+                        "total_tokens": 0  # The total number of tokens used. Required.
+                    },
+                    "max_output_tokens": 0,  # Optional. Maximum output tokens setting.
+                    "parallel_tool_calls": bool,  # Optional. Whether parallel tool calls are
+                      enabled.
+                    "status": "str",  # Optional. Status of the response.
+                    "temperature": 0.0,  # Optional. Temperature setting used for the response.
+                    "tool_choice": "str",  # Optional. Tool choice setting used for the response.
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. Top-p setting used for the response.
+                    "user": "str"  # Optional. User identifier.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_inference_create_response_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    async def create_async_invoke(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate Image, Audio, or Text-to-Speech Using fal Models.
+
+        Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous
+        job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll
+        for the result.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "input": {
+                        "enable_safety_checker": bool,  # Optional. Whether to enable the
+                          safety checker for generated content.
+                        "guidance_scale": 0.0,  # Optional. Controls how closely the image
+                          generation model follows the prompt. Higher values produce output more
+                          closely matching the prompt.
+                        "num_images": 0,  # Optional. The number of images to generate.
+                        "num_inference_steps": 0,  # Optional. The number of inference steps
+                          to use during image generation. More steps generally produce higher quality
+                          output but take longer.
+                        "output_format": "str",  # Optional. The desired output format or
+                          aspect ratio for image generation.
+                        "prompt": "str",  # Optional. The text prompt describing the desired
+                          output. Used for image generation and audio generation models.
+                        "seconds_total": 0,  # Optional. The total duration in seconds for
+                          generated audio. Used for audio generation models.
+                        "text": "str"  # Optional. The text content to convert to speech.
+                          Used for text-to-speech models.
+                    },
+                    "model_id": "str",  # The ID of the model to invoke asynchronously. Required.
+                    "tags": [
+                        {
+                            "key": "str",  # The tag key. Required.
+                            "value": "str"  # The tag value. Required.
+                        }
+                    ]
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "created_at": "2020-02-20 00:00:00",  # The timestamp when the request was
+                      created. Required.
+                    "model_id": "str",  # The model ID that was invoked. Required.
+                    "request_id": "str",  # A unique identifier for the async invocation request.
+                      Use this ID to check the status and retrieve the result. Required.
+                    "status": "str",  # The current status of the async invocation. Required.
+                      Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED".
+                    "completed_at": "2020-02-20 00:00:00",  # Optional. The timestamp when the
+                      job completed. Null until finished.
+                    "error": "str",  # Optional. Error message if the job failed. Null on
+                      success.
+                    "output": {
+                        "str": {}  # Optional. The output of the invocation. Null while the
+                          job is queued or in progress. Contains the result once completed.
+                    },
+                    "started_at": "2020-02-20 00:00:00"  # Optional. The timestamp when the job
+                      started processing. Null while queued.
+                }
+        """
+
+    @overload
+    async def create_async_invoke(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate Image, Audio, or Text-to-Speech Using fal Models.
+
+        Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous
+        job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll
+        for the result.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 202
+                response == {
+                    "created_at": "2020-02-20 00:00:00",  # The timestamp when the request was
+                      created. Required.
+                    "model_id": "str",  # The model ID that was invoked. Required.
+                    "request_id": "str",  # A unique identifier for the async invocation request.
+                      Use this ID to check the status and retrieve the result. Required.
+                    "status": "str",  # The current status of the async invocation. Required.
+                      Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED".
+                    "completed_at": "2020-02-20 00:00:00",  # Optional. The timestamp when the
+                      job completed. Null until finished.
+                    "error": "str",  # Optional. Error message if the job failed. Null on
+                      success.
+                    "output": {
+                        "str": {}  # Optional. The output of the invocation. Null while the
+                          job is queued or in progress. Contains the result once completed.
+                    },
+                    "started_at": "2020-02-20 00:00:00"  # Optional. The timestamp when the job
+                      started processing. Null while queued.
+                }
+        """
+
+    @distributed_trace_async
+    async def create_async_invoke(
+        self, body: Union[JSON, IO[bytes]], **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate Image, Audio, or Text-to-Speech Using fal Models.
+
+        Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous
+        job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll
+        for the result.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "input": {
+                        "enable_safety_checker": bool,  # Optional. Whether to enable the
+                          safety checker for generated content.
+                        "guidance_scale": 0.0,  # Optional. Controls how closely the image
+                          generation model follows the prompt. Higher values produce output more
+                          closely matching the prompt.
+                        "num_images": 0,  # Optional. The number of images to generate.
+                        "num_inference_steps": 0,  # Optional. The number of inference steps
+                          to use during image generation. More steps generally produce higher quality
+                          output but take longer.
+                        "output_format": "str",  # Optional. The desired output format or
+                          aspect ratio for image generation.
+                        "prompt": "str",  # Optional. The text prompt describing the desired
+                          output. Used for image generation and audio generation models.
+                        "seconds_total": 0,  # Optional. The total duration in seconds for
+                          generated audio. Used for audio generation models.
+                        "text": "str"  # Optional. The text content to convert to speech.
+                          Used for text-to-speech models.
+                    },
+                    "model_id": "str",  # The ID of the model to invoke asynchronously. Required.
+                    "tags": [
+                        {
+                            "key": "str",  # The tag key. Required.
+                            "value": "str"  # The tag value. Required.
+                        }
+                    ]
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "created_at": "2020-02-20 00:00:00",  # The timestamp when the request was
+                      created. Required.
+                    "model_id": "str",  # The model ID that was invoked. Required.
+                    "request_id": "str",  # A unique identifier for the async invocation request.
+                      Use this ID to check the status and retrieve the result. Required.
+                    "status": "str",  # The current status of the async invocation. Required.
+                      Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED".
+                    "completed_at": "2020-02-20 00:00:00",  # Optional. The timestamp when the
+                      job completed. Null until finished.
+                    "error": "str",  # Optional. Error message if the job failed. Null on
+                      success.
+                    "output": {
+                        "str": {}  # Optional. The output of the invocation. Null while the
+                          job is queued or in progress. Contains the result once completed.
+                    },
+                    "started_at": "2020-02-20 00:00:00"  # Optional. The timestamp when the job
+                      started processing. Null while queued.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_inference_create_async_invoke_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+
+class AgentInferenceOperations:
+    """
+    .. warning::
+        **DO NOT** instantiate this class directly.
+
+        Instead, you should access the following operations through
+        :class:`~pydo.aio.GeneratedClient`'s
+        :attr:`agent_inference` attribute.
+    """
+
+    def __init__(self, *args, **kwargs) -> None:
+        input_args = list(args)
+        self._client = input_args.pop(0) if input_args else kwargs.pop("client")
+        self._config = input_args.pop(0) if input_args else kwargs.pop("config")
+        self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer")
+        self._deserialize = (
+            input_args.pop(0) if input_args else kwargs.pop("deserializer")
+        )
+
+    @overload
+    async def create_chat_completion(
+        self,
+        body: JSON,
+        *,
+        agent: bool = True,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation via a customer-provisioned
+        agent endpoint.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword agent: Must be set to true for agent-based completion behavior. Default value is True.
+        :paramtype agent: bool
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "messages": [
+                        {
+                            "role": "str",  # The role of the message author. Required.
+                              Known values are: "system", "developer", "user", "assistant", and "tool".
+                            "content": "str",  # Optional. The contents of the message.
+                            "reasoning_content": "str",  # Optional. The reasoning
+                              content generated by the model (assistant messages only).
+                            "refusal": "str",  # Optional. The refusal message generated
+                              by the model (assistant messages only).
+                            "tool_call_id": "str",  # Optional. Tool call that this
+                              message is responding to (tool messages only).
+                            "tool_calls": [
+                                {
+                                    "function": {
+                                        "arguments": "str",  # The arguments
+                                          to call the function with, as generated by the model in JSON
+                                          format. Required.
+                                        "name": "str"  # The name of the
+                                          function to call. Required.
+                                    },
+                                    "id": "str",  # The ID of the tool call.
+                                      Required.
+                                    "type": "str"  # The type of the tool.
+                                      Currently, only function is supported. Required. "function"
+                                }
+                            ]
+                        }
+                    ],
+                    "model": "str",  # Model ID used to generate the response. Required.
+                    "frequency_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on their existing frequency in
+                      the text so far, decreasing the model's likelihood to repeat the same line
+                      verbatim.
+                    "logit_bias": {
+                        "str": 0  # Optional. Modify the likelihood of specified tokens
+                          appearing in the completion. Accepts a JSON object that maps tokens
+                          (specified by their token ID in the tokenizer) to an associated bias value
+                          from -100 to 100. Mathematically, the bias is added to the logits generated
+                          by the model prior to sampling. The exact effect will vary per model, but
+                          values between -1 and 1 should decrease or increase likelihood of selection;
+                          values like -100 or 100 should result in a ban or exclusive selection of the
+                          relevant token.
+                    },
+                    "logprobs": False,  # Optional. Default value is False. Whether to return log
+                      probabilities of the output tokens or not. If true, returns the log probabilities
+                      of each output token returned in the content of message.
+                    "max_completion_tokens": 0,  # Optional. The maximum number of completion
+                      tokens that may be used over the course of the run. The run will make a best
+                      effort to use only the number of completion tokens specified, across multiple
+                      turns of the run.
+                    "max_tokens": 0,  # Optional. The maximum number of tokens that can be
+                      generated in the completion. The token count of your prompt plus max_tokens
+                      cannot exceed the model's context length.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of 16 key-value pairs that can be
+                          attached to an object. This can be useful for storing additional information
+                          about the object in a structured format. Keys are strings with a maximum
+                          length of 64 characters. Values are strings with a maximum length of 512
+                          characters.
+                    },
+                    "n": 1,  # Optional. Default value is 1. How many chat completion choices to
+                      generate for each input message. Note that you will be charged based on the
+                      number of generated tokens across all of the choices. Keep n as 1 to minimize
+                      costs.
+                    "presence_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on whether they appear in the
+                      text so far, increasing the model's likelihood to talk about new topics.
+                    "reasoning_effort": "str",  # Optional. Constrains effort on reasoning for
+                      reasoning models. Reducing reasoning effort can result in faster responses and
+                      fewer tokens used on reasoning in a response. Known values are: "none",
+                      "minimal", "low", "medium", "high", and "xhigh".
+                    "seed": 0,  # Optional. If specified, the system will make a best effort to
+                      sample deterministically, such that repeated requests with the same seed and
+                      parameters should return the same result. Determinism is not guaranteed.
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. If set to true, the
+                      model response data will be streamed to the client as it is generated using
+                      server-sent events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data [DONE] message. The usage field on this chunk
+                          shows the token usage statistics for the entire request, and the choices
+                          field will always be an empty array.
+                    },
+                    "temperature": 0.0,  # Optional. What sampling temperature to use, between 0
+                      and 2. Higher values like 0.8 will make the output more random, while lower
+                      values like 0.2 will make it more focused and deterministic. We generally
+                      recommend altering this or top_p but not both.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "function": {
+                                "name": "str",  # The name of the function to be
+                                  called. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
+                                  with a maximum length of 64. Required.
+                                "description": "str",  # Optional. A description of
+                                  what the function does, used by the model to choose when and how to
+                                  call the function.
+                                "parameters": {
+                                    "str": {}  # Optional. The parameters the
+                                      function accepts, described as a JSON Schema object.
+                                }
+                            },
+                            "type": "str"  # The type of the tool. Currently, only
+                              function is supported. Required. "function"
+                        }
+                    ],
+                    "top_logprobs": 0,  # Optional. An integer between 0 and 20 specifying the
+                      number of most likely tokens to return at each token position, each with an
+                      associated log probability. logprobs must be set to true if this parameter is
+                      used.
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+                      probability mass are considered. We generally recommend altering this or
+                      temperature but not both.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+
+    @overload
+    async def create_chat_completion(
+        self,
+        body: IO[bytes],
+        *,
+        agent: bool = True,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation via a customer-provisioned
+        agent endpoint.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword agent: Must be set to true for agent-based completion behavior. Default value is True.
+        :paramtype agent: bool
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+
+    @distributed_trace_async
+    async def create_chat_completion(
+        self, body: Union[JSON, IO[bytes]], *, agent: bool = True, **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation via a customer-provisioned
+        agent endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword agent: Must be set to true for agent-based completion behavior. Default value is True.
+        :paramtype agent: bool
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "messages": [
+                        {
+                            "role": "str",  # The role of the message author. Required.
+                              Known values are: "system", "developer", "user", "assistant", and "tool".
+                            "content": "str",  # Optional. The contents of the message.
+                            "reasoning_content": "str",  # Optional. The reasoning
+                              content generated by the model (assistant messages only).
+                            "refusal": "str",  # Optional. The refusal message generated
+                              by the model (assistant messages only).
+                            "tool_call_id": "str",  # Optional. Tool call that this
+                              message is responding to (tool messages only).
+                            "tool_calls": [
+                                {
+                                    "function": {
+                                        "arguments": "str",  # The arguments
+                                          to call the function with, as generated by the model in JSON
+                                          format. Required.
+                                        "name": "str"  # The name of the
+                                          function to call. Required.
+                                    },
+                                    "id": "str",  # The ID of the tool call.
+                                      Required.
+                                    "type": "str"  # The type of the tool.
+                                      Currently, only function is supported. Required. "function"
+                                }
+                            ]
+                        }
+                    ],
+                    "model": "str",  # Model ID used to generate the response. Required.
+                    "frequency_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on their existing frequency in
+                      the text so far, decreasing the model's likelihood to repeat the same line
+                      verbatim.
+                    "logit_bias": {
+                        "str": 0  # Optional. Modify the likelihood of specified tokens
+                          appearing in the completion. Accepts a JSON object that maps tokens
+                          (specified by their token ID in the tokenizer) to an associated bias value
+                          from -100 to 100. Mathematically, the bias is added to the logits generated
+                          by the model prior to sampling. The exact effect will vary per model, but
+                          values between -1 and 1 should decrease or increase likelihood of selection;
+                          values like -100 or 100 should result in a ban or exclusive selection of the
+                          relevant token.
+                    },
+                    "logprobs": False,  # Optional. Default value is False. Whether to return log
+                      probabilities of the output tokens or not. If true, returns the log probabilities
+                      of each output token returned in the content of message.
+                    "max_completion_tokens": 0,  # Optional. The maximum number of completion
+                      tokens that may be used over the course of the run. The run will make a best
+                      effort to use only the number of completion tokens specified, across multiple
+                      turns of the run.
+                    "max_tokens": 0,  # Optional. The maximum number of tokens that can be
+                      generated in the completion. The token count of your prompt plus max_tokens
+                      cannot exceed the model's context length.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of 16 key-value pairs that can be
+                          attached to an object. This can be useful for storing additional information
+                          about the object in a structured format. Keys are strings with a maximum
+                          length of 64 characters. Values are strings with a maximum length of 512
+                          characters.
+                    },
+                    "n": 1,  # Optional. Default value is 1. How many chat completion choices to
+                      generate for each input message. Note that you will be charged based on the
+                      number of generated tokens across all of the choices. Keep n as 1 to minimize
+                      costs.
+                    "presence_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on whether they appear in the
+                      text so far, increasing the model's likelihood to talk about new topics.
+                    "reasoning_effort": "str",  # Optional. Constrains effort on reasoning for
+                      reasoning models. Reducing reasoning effort can result in faster responses and
+                      fewer tokens used on reasoning in a response. Known values are: "none",
+                      "minimal", "low", "medium", "high", and "xhigh".
+                    "seed": 0,  # Optional. If specified, the system will make a best effort to
+                      sample deterministically, such that repeated requests with the same seed and
+                      parameters should return the same result. Determinism is not guaranteed.
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. If set to true, the
+                      model response data will be streamed to the client as it is generated using
+                      server-sent events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data [DONE] message. The usage field on this chunk
+                          shows the token usage statistics for the entire request, and the choices
+                          field will always be an empty array.
+                    },
+                    "temperature": 0.0,  # Optional. What sampling temperature to use, between 0
+                      and 2. Higher values like 0.8 will make the output more random, while lower
+                      values like 0.2 will make it more focused and deterministic. We generally
+                      recommend altering this or top_p but not both.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "function": {
+                                "name": "str",  # The name of the function to be
+                                  called. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
+                                  with a maximum length of 64. Required.
+                                "description": "str",  # Optional. A description of
+                                  what the function does, used by the model to choose when and how to
+                                  call the function.
+                                "parameters": {
+                                    "str": {}  # Optional. The parameters the
+                                      function accepts, described as a JSON Schema object.
+                                }
+                            },
+                            "type": "str"  # The type of the tool. Currently, only
+                              function is supported. Required. "function"
+                        }
+                    ],
+                    "top_logprobs": 0,  # Optional. An integer between 0 and 20 specifying the
+                      number of most likely tokens to return at each token position, each with an
+                      associated log probability. logprobs must be set to true if this parameter is
+                      used.
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+                      probability mass are considered. We generally recommend altering this or
+                      temperature but not both.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_agent_inference_create_chat_completion_request(
+            agent=agent,
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
diff --git a/src/pydo/operations/__init__.py b/src/pydo/operations/__init__.py
index 0c8c048..4286825 100644
--- a/src/pydo/operations/__init__.py
+++ b/src/pydo/operations/__init__.py
@@ -54,6 +54,8 @@
 from ._operations import VpcnatgatewaysOperations
 from ._operations import UptimeOperations
 from ._operations import GenaiOperations
+from ._operations import InferenceOperations
+from ._operations import AgentInferenceOperations
 
 from ._patch import __all__ as _patch_all
 from ._patch import *  # pylint: disable=unused-wildcard-import
@@ -110,6 +112,8 @@
     "VpcnatgatewaysOperations",
     "UptimeOperations",
     "GenaiOperations",
+    "InferenceOperations",
+    "AgentInferenceOperations",
 ]
 __all__.extend([p for p in _patch_all if p not in __all__])
 _patch_sdk()
diff --git a/src/pydo/operations/_operations.py b/src/pydo/operations/_operations.py
index 4795b9e..b466b61 100644
--- a/src/pydo/operations/_operations.py
+++ b/src/pydo/operations/_operations.py
@@ -14644,6 +14644,137 @@ def build_genai_list_evaluation_test_cases_by_workspace_request(  # pylint: disa
     return HttpRequest(method="GET", url=_url, headers=_headers, **kwargs)
 
 
+def build_inference_create_chat_completion_request(
+    **kwargs: Any,
+) -> HttpRequest:  # pylint: disable=name-too-long
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop(
+        "content_type", _headers.pop("Content-Type", None)
+    )
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v1/chat/completions"
+
+    # Construct headers
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header(
+            "content_type", content_type, "str"
+        )
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs)
+
+
+def build_inference_create_image_request(**kwargs: Any) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop(
+        "content_type", _headers.pop("Content-Type", None)
+    )
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v1/images/generations"
+
+    # Construct headers
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header(
+            "content_type", content_type, "str"
+        )
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs)
+
+
+def build_inference_list_models_request(**kwargs: Any) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v1/models"
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="GET", url=_url, headers=_headers, **kwargs)
+
+
+def build_inference_create_response_request(**kwargs: Any) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop(
+        "content_type", _headers.pop("Content-Type", None)
+    )
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v1/responses"
+
+    # Construct headers
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header(
+            "content_type", content_type, "str"
+        )
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs)
+
+
+def build_inference_create_async_invoke_request(
+    **kwargs: Any,
+) -> HttpRequest:  # pylint: disable=name-too-long
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop(
+        "content_type", _headers.pop("Content-Type", None)
+    )
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v1/async-invoke"
+
+    # Construct headers
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header(
+            "content_type", content_type, "str"
+        )
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs)
+
+
+def build_agent_inference_create_chat_completion_request(  # pylint: disable=name-too-long
+    *, agent: bool = True, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop(
+        "content_type", _headers.pop("Content-Type", None)
+    )
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/api/v1/chat/completions"
+
+    # Construct parameters
+    _params["agent"] = _SERIALIZER.query("agent", agent, "bool")
+
+    # Construct headers
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header(
+            "content_type", content_type, "str"
+        )
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(
+        method="POST", url=_url, params=_params, headers=_headers, **kwargs
+    )
+
+
 class OneClicksOperations:
     """
     .. warning::
@@ -266623,3 +266754,2654 @@ def list_evaluation_test_cases_by_workspace(
             return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
 
         return cast(JSON, deserialized)  # type: ignore
+
+
+class InferenceOperations:
+    """
+    .. warning::
+        **DO NOT** instantiate this class directly.
+
+        Instead, you should access the following operations through
+        :class:`~pydo.GeneratedClient`'s
+        :attr:`inference` attribute.
+    """
+
+    def __init__(self, *args, **kwargs):
+        input_args = list(args)
+        self._client = input_args.pop(0) if input_args else kwargs.pop("client")
+        self._config = input_args.pop(0) if input_args else kwargs.pop("config")
+        self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer")
+        self._deserialize = (
+            input_args.pop(0) if input_args else kwargs.pop("deserializer")
+        )
+
+    @overload
+    def create_chat_completion(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "messages": [
+                        {
+                            "role": "str",  # The role of the message author. Required.
+                              Known values are: "system", "developer", "user", "assistant", and "tool".
+                            "content": "str",  # Optional. The contents of the message.
+                            "reasoning_content": "str",  # Optional. The reasoning
+                              content generated by the model (assistant messages only).
+                            "refusal": "str",  # Optional. The refusal message generated
+                              by the model (assistant messages only).
+                            "tool_call_id": "str",  # Optional. Tool call that this
+                              message is responding to (tool messages only).
+                            "tool_calls": [
+                                {
+                                    "function": {
+                                        "arguments": "str",  # The arguments
+                                          to call the function with, as generated by the model in JSON
+                                          format. Required.
+                                        "name": "str"  # The name of the
+                                          function to call. Required.
+                                    },
+                                    "id": "str",  # The ID of the tool call.
+                                      Required.
+                                    "type": "str"  # The type of the tool.
+                                      Currently, only function is supported. Required. "function"
+                                }
+                            ]
+                        }
+                    ],
+                    "model": "str",  # Model ID used to generate the response. Required.
+                    "frequency_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on their existing frequency in
+                      the text so far, decreasing the model's likelihood to repeat the same line
+                      verbatim.
+                    "logit_bias": {
+                        "str": 0  # Optional. Modify the likelihood of specified tokens
+                          appearing in the completion. Accepts a JSON object that maps tokens
+                          (specified by their token ID in the tokenizer) to an associated bias value
+                          from -100 to 100. Mathematically, the bias is added to the logits generated
+                          by the model prior to sampling. The exact effect will vary per model, but
+                          values between -1 and 1 should decrease or increase likelihood of selection;
+                          values like -100 or 100 should result in a ban or exclusive selection of the
+                          relevant token.
+                    },
+                    "logprobs": False,  # Optional. Default value is False. Whether to return log
+                      probabilities of the output tokens or not. If true, returns the log probabilities
+                      of each output token returned in the content of message.
+                    "max_completion_tokens": 0,  # Optional. The maximum number of completion
+                      tokens that may be used over the course of the run. The run will make a best
+                      effort to use only the number of completion tokens specified, across multiple
+                      turns of the run.
+                    "max_tokens": 0,  # Optional. The maximum number of tokens that can be
+                      generated in the completion. The token count of your prompt plus max_tokens
+                      cannot exceed the model's context length.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of 16 key-value pairs that can be
+                          attached to an object. This can be useful for storing additional information
+                          about the object in a structured format. Keys are strings with a maximum
+                          length of 64 characters. Values are strings with a maximum length of 512
+                          characters.
+                    },
+                    "n": 1,  # Optional. Default value is 1. How many chat completion choices to
+                      generate for each input message. Note that you will be charged based on the
+                      number of generated tokens across all of the choices. Keep n as 1 to minimize
+                      costs.
+                    "presence_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on whether they appear in the
+                      text so far, increasing the model's likelihood to talk about new topics.
+                    "reasoning_effort": "str",  # Optional. Constrains effort on reasoning for
+                      reasoning models. Reducing reasoning effort can result in faster responses and
+                      fewer tokens used on reasoning in a response. Known values are: "none",
+                      "minimal", "low", "medium", "high", and "xhigh".
+                    "seed": 0,  # Optional. If specified, the system will make a best effort to
+                      sample deterministically, such that repeated requests with the same seed and
+                      parameters should return the same result. Determinism is not guaranteed.
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. If set to true, the
+                      model response data will be streamed to the client as it is generated using
+                      server-sent events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data [DONE] message. The usage field on this chunk
+                          shows the token usage statistics for the entire request, and the choices
+                          field will always be an empty array.
+                    },
+                    "temperature": 0.0,  # Optional. What sampling temperature to use, between 0
+                      and 2. Higher values like 0.8 will make the output more random, while lower
+                      values like 0.2 will make it more focused and deterministic. We generally
+                      recommend altering this or top_p but not both.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "function": {
+                                "name": "str",  # The name of the function to be
+                                  called. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
+                                  with a maximum length of 64. Required.
+                                "description": "str",  # Optional. A description of
+                                  what the function does, used by the model to choose when and how to
+                                  call the function.
+                                "parameters": {
+                                    "str": {}  # Optional. The parameters the
+                                      function accepts, described as a JSON Schema object.
+                                }
+                            },
+                            "type": "str"  # The type of the tool. Currently, only
+                              function is supported. Required. "function"
+                        }
+                    ],
+                    "top_logprobs": 0,  # Optional. An integer between 0 and 20 specifying the
+                      number of most likely tokens to return at each token position, each with an
+                      associated log probability. logprobs must be set to true if this parameter is
+                      used.
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+                      probability mass are considered. We generally recommend altering this or
+                      temperature but not both.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+
+    @overload
+    def create_chat_completion(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+
+    @distributed_trace
+    def create_chat_completion(
+        self, body: Union[JSON, IO[bytes]], **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "messages": [
+                        {
+                            "role": "str",  # The role of the message author. Required.
+                              Known values are: "system", "developer", "user", "assistant", and "tool".
+                            "content": "str",  # Optional. The contents of the message.
+                            "reasoning_content": "str",  # Optional. The reasoning
+                              content generated by the model (assistant messages only).
+                            "refusal": "str",  # Optional. The refusal message generated
+                              by the model (assistant messages only).
+                            "tool_call_id": "str",  # Optional. Tool call that this
+                              message is responding to (tool messages only).
+                            "tool_calls": [
+                                {
+                                    "function": {
+                                        "arguments": "str",  # The arguments
+                                          to call the function with, as generated by the model in JSON
+                                          format. Required.
+                                        "name": "str"  # The name of the
+                                          function to call. Required.
+                                    },
+                                    "id": "str",  # The ID of the tool call.
+                                      Required.
+                                    "type": "str"  # The type of the tool.
+                                      Currently, only function is supported. Required. "function"
+                                }
+                            ]
+                        }
+                    ],
+                    "model": "str",  # Model ID used to generate the response. Required.
+                    "frequency_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on their existing frequency in
+                      the text so far, decreasing the model's likelihood to repeat the same line
+                      verbatim.
+                    "logit_bias": {
+                        "str": 0  # Optional. Modify the likelihood of specified tokens
+                          appearing in the completion. Accepts a JSON object that maps tokens
+                          (specified by their token ID in the tokenizer) to an associated bias value
+                          from -100 to 100. Mathematically, the bias is added to the logits generated
+                          by the model prior to sampling. The exact effect will vary per model, but
+                          values between -1 and 1 should decrease or increase likelihood of selection;
+                          values like -100 or 100 should result in a ban or exclusive selection of the
+                          relevant token.
+                    },
+                    "logprobs": False,  # Optional. Default value is False. Whether to return log
+                      probabilities of the output tokens or not. If true, returns the log probabilities
+                      of each output token returned in the content of message.
+                    "max_completion_tokens": 0,  # Optional. The maximum number of completion
+                      tokens that may be used over the course of the run. The run will make a best
+                      effort to use only the number of completion tokens specified, across multiple
+                      turns of the run.
+                    "max_tokens": 0,  # Optional. The maximum number of tokens that can be
+                      generated in the completion. The token count of your prompt plus max_tokens
+                      cannot exceed the model's context length.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of 16 key-value pairs that can be
+                          attached to an object. This can be useful for storing additional information
+                          about the object in a structured format. Keys are strings with a maximum
+                          length of 64 characters. Values are strings with a maximum length of 512
+                          characters.
+                    },
+                    "n": 1,  # Optional. Default value is 1. How many chat completion choices to
+                      generate for each input message. Note that you will be charged based on the
+                      number of generated tokens across all of the choices. Keep n as 1 to minimize
+                      costs.
+                    "presence_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on whether they appear in the
+                      text so far, increasing the model's likelihood to talk about new topics.
+                    "reasoning_effort": "str",  # Optional. Constrains effort on reasoning for
+                      reasoning models. Reducing reasoning effort can result in faster responses and
+                      fewer tokens used on reasoning in a response. Known values are: "none",
+                      "minimal", "low", "medium", "high", and "xhigh".
+                    "seed": 0,  # Optional. If specified, the system will make a best effort to
+                      sample deterministically, such that repeated requests with the same seed and
+                      parameters should return the same result. Determinism is not guaranteed.
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. If set to true, the
+                      model response data will be streamed to the client as it is generated using
+                      server-sent events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data [DONE] message. The usage field on this chunk
+                          shows the token usage statistics for the entire request, and the choices
+                          field will always be an empty array.
+                    },
+                    "temperature": 0.0,  # Optional. What sampling temperature to use, between 0
+                      and 2. Higher values like 0.8 will make the output more random, while lower
+                      values like 0.2 will make it more focused and deterministic. We generally
+                      recommend altering this or top_p but not both.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "function": {
+                                "name": "str",  # The name of the function to be
+                                  called. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
+                                  with a maximum length of 64. Required.
+                                "description": "str",  # Optional. A description of
+                                  what the function does, used by the model to choose when and how to
+                                  call the function.
+                                "parameters": {
+                                    "str": {}  # Optional. The parameters the
+                                      function accepts, described as a JSON Schema object.
+                                }
+                            },
+                            "type": "str"  # The type of the tool. Currently, only
+                              function is supported. Required. "function"
+                        }
+                    ],
+                    "top_logprobs": 0,  # Optional. An integer between 0 and 20 specifying the
+                      number of most likely tokens to return at each token position, each with an
+                      associated log probability. logprobs must be set to true if this parameter is
+                      used.
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+                      probability mass are considered. We generally recommend altering this or
+                      temperature but not both.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_inference_create_chat_completion_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    def create_image(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate images from text prompts.
+
+        Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation
+        model with automatic prompt optimization and enhanced visual capabilities.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "model": "str",  # The model to use for image generation. Required.
+                    "n": 0,  # The number of images to generate. Must be between 1 and 10.
+                      Required.
+                    "prompt": "str",  # A text description of the desired image(s). Supports up
+                      to 32,000 characters and provides automatic prompt optimization for best results.
+                      Required.
+                    "background": "str",  # Optional. The background setting for the image
+                      generation. Supported values: transparent, opaque, auto.
+                    "moderation": "str",  # Optional. The moderation setting for the image
+                      generation. Supported values: low, auto.
+                    "output_compression": 0,  # Optional. The output compression level for the
+                      image generation (0-100).
+                    "output_format": "str",  # Optional. The output format for the image
+                      generation. Supported values: png, webp, jpeg.
+                    "partial_images": 0,  # Optional. The number of partial image chunks to
+                      return during streaming generation. Defaults to 0. When stream=true, this must be
+                      greater than 0 to receive progressive updates of the image as it is being
+                      generated.
+                    "quality": "str",  # Optional. The quality of the image that will be
+                      generated. Supported values: auto, high, medium, low.
+                    "size": "str",  # Optional. The size of the generated images. GPT-IMAGE-1
+                      supports: auto (automatically select best size), 1536x1024 (landscape), 1024x1536
+                      (portrait). Known values are: "auto", "1536x1024", and "1024x1536".
+                    "stream": False,  # Optional. Default value is False. If set to true, partial
+                      image data will be streamed as the image is being generated. The response will be
+                      sent as server-sent events with partial image chunks. When stream is true,
+                      partial_images must be greater than 0.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the images were
+                      created. Required.
+                    "data": [
+                        {
+                            "b64_json": "str",  # The base64-encoded JSON of the
+                              generated image. Required.
+                            "revised_prompt": "str"  # Optional. The optimized prompt
+                              that was used to generate the image.
+                        }
+                    ],
+                    "background": "str",  # Optional. The background setting used for the image
+                      generation.
+                    "output_format": "str",  # Optional. The output format of the generated
+                      image.
+                    "quality": "str",  # Optional. The quality setting used for the image
+                      generation.
+                    "size": "str",  # Optional. The size of the generated image.
+                    "usage": {
+                        "input_tokens": 0,  # The number of tokens (images and text) in the
+                          input prompt. Required.
+                        "input_tokens_details": {
+                            "image_tokens": 0,  # The number of image tokens in the input
+                              prompt. Required.
+                            "text_tokens": 0  # The number of text tokens in the input
+                              prompt. Required.
+                        },
+                        "output_tokens": 0,  # The number of image tokens in the output
+                          image. Required.
+                        "total_tokens": 0  # The total number of tokens (images and text)
+                          used for the image generation. Required.
+                    }
+                }
+        """
+
+    @overload
+    def create_image(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        """Generate images from text prompts.
+
+        Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation
+        model with automatic prompt optimization and enhanced visual capabilities.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the images were
+                      created. Required.
+                    "data": [
+                        {
+                            "b64_json": "str",  # The base64-encoded JSON of the
+                              generated image. Required.
+                            "revised_prompt": "str"  # Optional. The optimized prompt
+                              that was used to generate the image.
+                        }
+                    ],
+                    "background": "str",  # Optional. The background setting used for the image
+                      generation.
+                    "output_format": "str",  # Optional. The output format of the generated
+                      image.
+                    "quality": "str",  # Optional. The quality setting used for the image
+                      generation.
+                    "size": "str",  # Optional. The size of the generated image.
+                    "usage": {
+                        "input_tokens": 0,  # The number of tokens (images and text) in the
+                          input prompt. Required.
+                        "input_tokens_details": {
+                            "image_tokens": 0,  # The number of image tokens in the input
+                              prompt. Required.
+                            "text_tokens": 0  # The number of text tokens in the input
+                              prompt. Required.
+                        },
+                        "output_tokens": 0,  # The number of image tokens in the output
+                          image. Required.
+                        "total_tokens": 0  # The total number of tokens (images and text)
+                          used for the image generation. Required.
+                    }
+                }
+        """
+
+    @distributed_trace
+    def create_image(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate images from text prompts.
+
+        Creates a high-quality image from a text prompt using GPT-IMAGE-1, the latest image generation
+        model with automatic prompt optimization and enhanced visual capabilities.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "model": "str",  # The model to use for image generation. Required.
+                    "n": 0,  # The number of images to generate. Must be between 1 and 10.
+                      Required.
+                    "prompt": "str",  # A text description of the desired image(s). Supports up
+                      to 32,000 characters and provides automatic prompt optimization for best results.
+                      Required.
+                    "background": "str",  # Optional. The background setting for the image
+                      generation. Supported values: transparent, opaque, auto.
+                    "moderation": "str",  # Optional. The moderation setting for the image
+                      generation. Supported values: low, auto.
+                    "output_compression": 0,  # Optional. The output compression level for the
+                      image generation (0-100).
+                    "output_format": "str",  # Optional. The output format for the image
+                      generation. Supported values: png, webp, jpeg.
+                    "partial_images": 0,  # Optional. The number of partial image chunks to
+                      return during streaming generation. Defaults to 0. When stream=true, this must be
+                      greater than 0 to receive progressive updates of the image as it is being
+                      generated.
+                    "quality": "str",  # Optional. The quality of the image that will be
+                      generated. Supported values: auto, high, medium, low.
+                    "size": "str",  # Optional. The size of the generated images. GPT-IMAGE-1
+                      supports: auto (automatically select best size), 1536x1024 (landscape), 1024x1536
+                      (portrait). Known values are: "auto", "1536x1024", and "1024x1536".
+                    "stream": False,  # Optional. Default value is False. If set to true, partial
+                      image data will be streamed as the image is being generated. The response will be
+                      sent as server-sent events with partial image chunks. When stream is true,
+                      partial_images must be greater than 0.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the images were
+                      created. Required.
+                    "data": [
+                        {
+                            "b64_json": "str",  # The base64-encoded JSON of the
+                              generated image. Required.
+                            "revised_prompt": "str"  # Optional. The optimized prompt
+                              that was used to generate the image.
+                        }
+                    ],
+                    "background": "str",  # Optional. The background setting used for the image
+                      generation.
+                    "output_format": "str",  # Optional. The output format of the generated
+                      image.
+                    "quality": "str",  # Optional. The quality setting used for the image
+                      generation.
+                    "size": "str",  # Optional. The size of the generated image.
+                    "usage": {
+                        "input_tokens": 0,  # The number of tokens (images and text) in the
+                          input prompt. Required.
+                        "input_tokens_details": {
+                            "image_tokens": 0,  # The number of image tokens in the input
+                              prompt. Required.
+                            "text_tokens": 0  # The number of text tokens in the input
+                              prompt. Required.
+                        },
+                        "output_tokens": 0,  # The number of image tokens in the output
+                          image. Required.
+                        "total_tokens": 0  # The total number of tokens (images and text)
+                          used for the image generation. Required.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_inference_create_image_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace
+    def list_models(self, **kwargs: Any) -> JSON:
+        """List available models.
+
+        Lists the currently available models, and provides basic information about each one such as the
+        owner and availability.
+
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "data": [
+                        {
+                            "created": 0,  # The Unix timestamp (in seconds) when the
+                              model was created. Required.
+                            "id": "str",  # The model identifier, which can be referenced
+                              in the API endpoints. Required.
+                            "object": "str",  # The object type, which is always "model".
+                              Required. "model"
+                            "owned_by": "str"  # The organization that owns the model.
+                              Required.
+                        }
+                    ],
+                    "object": "str"  # The object type, which is always "list". Required. "list"
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_inference_list_models_request(
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    def create_response(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Send Prompt to a Model Using the Responses API.
+
+        Generate text responses from text prompts. This endpoint supports both streaming and
+        non-streaming responses for supported text models.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "input": {},
+                    "model": "str",  # The model ID of the model you want to use. Get the model
+                      ID using ``/v1/models`` or on the available models page. Required.
+                    "instructions": "str",  # Optional. System-level instructions for the model.
+                      This sets the behavior and context for the response generation.
+                    "max_output_tokens": 0,  # Optional. The maximum number of tokens to generate
+                      in the response.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of key-value pairs that can be attached
+                          to the request.
+                    },
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. Set to true to stream
+                      partial responses as Server-Sent Events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data: [DONE] message with token usage statistics for
+                          the entire request.
+                    },
+                    "temperature": 0.0,  # Optional. A value between 0.0 and 2.0 to control
+                      randomness and creativity. Lower values like 0.2 make the output more focused and
+                      deterministic, while higher values like 0.8 make it more random.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function to be
+                              called.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts, described as a JSON Schema object.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass.
+                    "user": "str"  # Optional. A unique identifier representing your end-user.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the response was
+                      created. Required.
+                    "id": "str",  # A unique identifier for the response. Required.
+                    "model": "str",  # The model used to generate the response. Required.
+                    "object": "str",  # The object type, which is always ``response``. Required.
+                      "response"
+                    "output": [
+                        {
+                            "content": [
+                                {
+                                    "text": "str",  # The text content. Required.
+                                    "type": "str"  # The type of content part.
+                                      ``reasoning_text`` for reasoning content, ``output_text`` for
+                                      final output text. Required. Known values are: "reasoning_text"
+                                      and "output_text".
+                                }
+                            ],
+                            "type": "str",  # The type of output item. One of
+                              ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known
+                              values are: "reasoning", "message", and "function_call".
+                            "arguments": "str",  # Optional. JSON string of function
+                              arguments (present when type is ``function_call``"" ).
+                            "call_id": "str",  # Optional. The unique ID of the function
+                              tool call (present when type is ``function_call``"" ).
+                            "id": "str",  # Optional. The unique ID of the output item.
+                            "name": "str",  # Optional. The name of the function to call
+                              (present when type is ``function_call``"" ).
+                            "role": "str",  # Optional. The role associated with this
+                              output item (typically ``assistant``"" ).
+                            "status": "str"  # Optional. Status of the item.
+                        }
+                    ],
+                    "usage": {
+                        "input_tokens": 0,  # The number of input tokens. Required.
+                        "input_tokens_details": {
+                            "cached_tokens": 0  # The number of tokens that were
+                              retrieved from the cache. Required.
+                        },
+                        "output_tokens": 0,  # The number of output tokens. Required.
+                        "output_tokens_details": {
+                            "reasoning_tokens": 0,  # The number of reasoning tokens.
+                              Required.
+                            "tool_output_tokens": 0  # The number of tool output tokens.
+                              Required.
+                        },
+                        "total_tokens": 0  # The total number of tokens used. Required.
+                    },
+                    "max_output_tokens": 0,  # Optional. Maximum output tokens setting.
+                    "parallel_tool_calls": bool,  # Optional. Whether parallel tool calls are
+                      enabled.
+                    "status": "str",  # Optional. Status of the response.
+                    "temperature": 0.0,  # Optional. Temperature setting used for the response.
+                    "tool_choice": "str",  # Optional. Tool choice setting used for the response.
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. Top-p setting used for the response.
+                    "user": "str"  # Optional. User identifier.
+                }
+        """
+
+    @overload
+    def create_response(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Send Prompt to a Model Using the Responses API.
+
+        Generate text responses from text prompts. This endpoint supports both streaming and
+        non-streaming responses for supported text models.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the response was
+                      created. Required.
+                    "id": "str",  # A unique identifier for the response. Required.
+                    "model": "str",  # The model used to generate the response. Required.
+                    "object": "str",  # The object type, which is always ``response``. Required.
+                      "response"
+                    "output": [
+                        {
+                            "content": [
+                                {
+                                    "text": "str",  # The text content. Required.
+                                    "type": "str"  # The type of content part.
+                                      ``reasoning_text`` for reasoning content, ``output_text`` for
+                                      final output text. Required. Known values are: "reasoning_text"
+                                      and "output_text".
+                                }
+                            ],
+                            "type": "str",  # The type of output item. One of
+                              ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known
+                              values are: "reasoning", "message", and "function_call".
+                            "arguments": "str",  # Optional. JSON string of function
+                              arguments (present when type is ``function_call``"" ).
+                            "call_id": "str",  # Optional. The unique ID of the function
+                              tool call (present when type is ``function_call``"" ).
+                            "id": "str",  # Optional. The unique ID of the output item.
+                            "name": "str",  # Optional. The name of the function to call
+                              (present when type is ``function_call``"" ).
+                            "role": "str",  # Optional. The role associated with this
+                              output item (typically ``assistant``"" ).
+                            "status": "str"  # Optional. Status of the item.
+                        }
+                    ],
+                    "usage": {
+                        "input_tokens": 0,  # The number of input tokens. Required.
+                        "input_tokens_details": {
+                            "cached_tokens": 0  # The number of tokens that were
+                              retrieved from the cache. Required.
+                        },
+                        "output_tokens": 0,  # The number of output tokens. Required.
+                        "output_tokens_details": {
+                            "reasoning_tokens": 0,  # The number of reasoning tokens.
+                              Required.
+                            "tool_output_tokens": 0  # The number of tool output tokens.
+                              Required.
+                        },
+                        "total_tokens": 0  # The total number of tokens used. Required.
+                    },
+                    "max_output_tokens": 0,  # Optional. Maximum output tokens setting.
+                    "parallel_tool_calls": bool,  # Optional. Whether parallel tool calls are
+                      enabled.
+                    "status": "str",  # Optional. Status of the response.
+                    "temperature": 0.0,  # Optional. Temperature setting used for the response.
+                    "tool_choice": "str",  # Optional. Tool choice setting used for the response.
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. Top-p setting used for the response.
+                    "user": "str"  # Optional. User identifier.
+                }
+        """
+
+    @distributed_trace
+    def create_response(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Send Prompt to a Model Using the Responses API.
+
+        Generate text responses from text prompts. This endpoint supports both streaming and
+        non-streaming responses for supported text models.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "input": {},
+                    "model": "str",  # The model ID of the model you want to use. Get the model
+                      ID using ``/v1/models`` or on the available models page. Required.
+                    "instructions": "str",  # Optional. System-level instructions for the model.
+                      This sets the behavior and context for the response generation.
+                    "max_output_tokens": 0,  # Optional. The maximum number of tokens to generate
+                      in the response.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of key-value pairs that can be attached
+                          to the request.
+                    },
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. Set to true to stream
+                      partial responses as Server-Sent Events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data: [DONE] message with token usage statistics for
+                          the entire request.
+                    },
+                    "temperature": 0.0,  # Optional. A value between 0.0 and 2.0 to control
+                      randomness and creativity. Lower values like 0.2 make the output more focused and
+                      deterministic, while higher values like 0.8 make it more random.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function to be
+                              called.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts, described as a JSON Schema object.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass.
+                    "user": "str"  # Optional. A unique identifier representing your end-user.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "created": 0,  # The Unix timestamp (in seconds) of when the response was
+                      created. Required.
+                    "id": "str",  # A unique identifier for the response. Required.
+                    "model": "str",  # The model used to generate the response. Required.
+                    "object": "str",  # The object type, which is always ``response``. Required.
+                      "response"
+                    "output": [
+                        {
+                            "content": [
+                                {
+                                    "text": "str",  # The text content. Required.
+                                    "type": "str"  # The type of content part.
+                                      ``reasoning_text`` for reasoning content, ``output_text`` for
+                                      final output text. Required. Known values are: "reasoning_text"
+                                      and "output_text".
+                                }
+                            ],
+                            "type": "str",  # The type of output item. One of
+                              ``reasoning``"" , ``message``"" , or ``function_call``. Required. Known
+                              values are: "reasoning", "message", and "function_call".
+                            "arguments": "str",  # Optional. JSON string of function
+                              arguments (present when type is ``function_call``"" ).
+                            "call_id": "str",  # Optional. The unique ID of the function
+                              tool call (present when type is ``function_call``"" ).
+                            "id": "str",  # Optional. The unique ID of the output item.
+                            "name": "str",  # Optional. The name of the function to call
+                              (present when type is ``function_call``"" ).
+                            "role": "str",  # Optional. The role associated with this
+                              output item (typically ``assistant``"" ).
+                            "status": "str"  # Optional. Status of the item.
+                        }
+                    ],
+                    "usage": {
+                        "input_tokens": 0,  # The number of input tokens. Required.
+                        "input_tokens_details": {
+                            "cached_tokens": 0  # The number of tokens that were
+                              retrieved from the cache. Required.
+                        },
+                        "output_tokens": 0,  # The number of output tokens. Required.
+                        "output_tokens_details": {
+                            "reasoning_tokens": 0,  # The number of reasoning tokens.
+                              Required.
+                            "tool_output_tokens": 0  # The number of tool output tokens.
+                              Required.
+                        },
+                        "total_tokens": 0  # The total number of tokens used. Required.
+                    },
+                    "max_output_tokens": 0,  # Optional. Maximum output tokens setting.
+                    "parallel_tool_calls": bool,  # Optional. Whether parallel tool calls are
+                      enabled.
+                    "status": "str",  # Optional. Status of the response.
+                    "temperature": 0.0,  # Optional. Temperature setting used for the response.
+                    "tool_choice": "str",  # Optional. Tool choice setting used for the response.
+                    "tools": [
+                        {
+                            "type": "str",  # The type of the tool. Required. "function"
+                            "description": "str",  # Optional. A description of what the
+                              function does.
+                            "name": "str",  # Optional. The name of the function.
+                            "parameters": {
+                                "str": {}  # Optional. The parameters the function
+                                  accepts.
+                            }
+                        }
+                    ],
+                    "top_p": 0.0,  # Optional. Top-p setting used for the response.
+                    "user": "str"  # Optional. User identifier.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_inference_create_response_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    def create_async_invoke(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate Image, Audio, or Text-to-Speech Using fal Models.
+
+        Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous
+        job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll
+        for the result.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "input": {
+                        "enable_safety_checker": bool,  # Optional. Whether to enable the
+                          safety checker for generated content.
+                        "guidance_scale": 0.0,  # Optional. Controls how closely the image
+                          generation model follows the prompt. Higher values produce output more
+                          closely matching the prompt.
+                        "num_images": 0,  # Optional. The number of images to generate.
+                        "num_inference_steps": 0,  # Optional. The number of inference steps
+                          to use during image generation. More steps generally produce higher quality
+                          output but take longer.
+                        "output_format": "str",  # Optional. The desired output format or
+                          aspect ratio for image generation.
+                        "prompt": "str",  # Optional. The text prompt describing the desired
+                          output. Used for image generation and audio generation models.
+                        "seconds_total": 0,  # Optional. The total duration in seconds for
+                          generated audio. Used for audio generation models.
+                        "text": "str"  # Optional. The text content to convert to speech.
+                          Used for text-to-speech models.
+                    },
+                    "model_id": "str",  # The ID of the model to invoke asynchronously. Required.
+                    "tags": [
+                        {
+                            "key": "str",  # The tag key. Required.
+                            "value": "str"  # The tag value. Required.
+                        }
+                    ]
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "created_at": "2020-02-20 00:00:00",  # The timestamp when the request was
+                      created. Required.
+                    "model_id": "str",  # The model ID that was invoked. Required.
+                    "request_id": "str",  # A unique identifier for the async invocation request.
+                      Use this ID to check the status and retrieve the result. Required.
+                    "status": "str",  # The current status of the async invocation. Required.
+                      Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED".
+                    "completed_at": "2020-02-20 00:00:00",  # Optional. The timestamp when the
+                      job completed. Null until finished.
+                    "error": "str",  # Optional. Error message if the job failed. Null on
+                      success.
+                    "output": {
+                        "str": {}  # Optional. The output of the invocation. Null while the
+                          job is queued or in progress. Contains the result once completed.
+                    },
+                    "started_at": "2020-02-20 00:00:00"  # Optional. The timestamp when the job
+                      started processing. Null while queued.
+                }
+        """
+
+    @overload
+    def create_async_invoke(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate Image, Audio, or Text-to-Speech Using fal Models.
+
+        Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous
+        job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll
+        for the result.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 202
+                response == {
+                    "created_at": "2020-02-20 00:00:00",  # The timestamp when the request was
+                      created. Required.
+                    "model_id": "str",  # The model ID that was invoked. Required.
+                    "request_id": "str",  # A unique identifier for the async invocation request.
+                      Use this ID to check the status and retrieve the result. Required.
+                    "status": "str",  # The current status of the async invocation. Required.
+                      Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED".
+                    "completed_at": "2020-02-20 00:00:00",  # Optional. The timestamp when the
+                      job completed. Null until finished.
+                    "error": "str",  # Optional. Error message if the job failed. Null on
+                      success.
+                    "output": {
+                        "str": {}  # Optional. The output of the invocation. Null while the
+                          job is queued or in progress. Contains the result once completed.
+                    },
+                    "started_at": "2020-02-20 00:00:00"  # Optional. The timestamp when the job
+                      started processing. Null while queued.
+                }
+        """
+
+    @distributed_trace
+    def create_async_invoke(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Generate Image, Audio, or Text-to-Speech Using fal Models.
+
+        Generate Image, Audio, or Text-to-Speech Using fal Models. This endpoint starts an asynchronous
+        job and returns a request_id. The job status is QUEUED initially. Use the request_id to poll
+        for the result.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "input": {
+                        "enable_safety_checker": bool,  # Optional. Whether to enable the
+                          safety checker for generated content.
+                        "guidance_scale": 0.0,  # Optional. Controls how closely the image
+                          generation model follows the prompt. Higher values produce output more
+                          closely matching the prompt.
+                        "num_images": 0,  # Optional. The number of images to generate.
+                        "num_inference_steps": 0,  # Optional. The number of inference steps
+                          to use during image generation. More steps generally produce higher quality
+                          output but take longer.
+                        "output_format": "str",  # Optional. The desired output format or
+                          aspect ratio for image generation.
+                        "prompt": "str",  # Optional. The text prompt describing the desired
+                          output. Used for image generation and audio generation models.
+                        "seconds_total": 0,  # Optional. The total duration in seconds for
+                          generated audio. Used for audio generation models.
+                        "text": "str"  # Optional. The text content to convert to speech.
+                          Used for text-to-speech models.
+                    },
+                    "model_id": "str",  # The ID of the model to invoke asynchronously. Required.
+                    "tags": [
+                        {
+                            "key": "str",  # The tag key. Required.
+                            "value": "str"  # The tag value. Required.
+                        }
+                    ]
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "created_at": "2020-02-20 00:00:00",  # The timestamp when the request was
+                      created. Required.
+                    "model_id": "str",  # The model ID that was invoked. Required.
+                    "request_id": "str",  # A unique identifier for the async invocation request.
+                      Use this ID to check the status and retrieve the result. Required.
+                    "status": "str",  # The current status of the async invocation. Required.
+                      Known values are: "QUEUED", "IN_PROGRESS", "COMPLETED", and "FAILED".
+                    "completed_at": "2020-02-20 00:00:00",  # Optional. The timestamp when the
+                      job completed. Null until finished.
+                    "error": "str",  # Optional. Error message if the job failed. Null on
+                      success.
+                    "output": {
+                        "str": {}  # Optional. The output of the invocation. Null while the
+                          job is queued or in progress. Contains the result once completed.
+                    },
+                    "started_at": "2020-02-20 00:00:00"  # Optional. The timestamp when the job
+                      started processing. Null while queued.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_inference_create_async_invoke_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+
+class AgentInferenceOperations:
+    """
+    .. warning::
+        **DO NOT** instantiate this class directly.
+
+        Instead, you should access the following operations through
+        :class:`~pydo.GeneratedClient`'s
+        :attr:`agent_inference` attribute.
+    """
+
+    def __init__(self, *args, **kwargs):
+        input_args = list(args)
+        self._client = input_args.pop(0) if input_args else kwargs.pop("client")
+        self._config = input_args.pop(0) if input_args else kwargs.pop("config")
+        self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer")
+        self._deserialize = (
+            input_args.pop(0) if input_args else kwargs.pop("deserializer")
+        )
+
+    @overload
+    def create_chat_completion(
+        self,
+        body: JSON,
+        *,
+        agent: bool = True,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation via a customer-provisioned
+        agent endpoint.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword agent: Must be set to true for agent-based completion behavior. Default value is True.
+        :paramtype agent: bool
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "messages": [
+                        {
+                            "role": "str",  # The role of the message author. Required.
+                              Known values are: "system", "developer", "user", "assistant", and "tool".
+                            "content": "str",  # Optional. The contents of the message.
+                            "reasoning_content": "str",  # Optional. The reasoning
+                              content generated by the model (assistant messages only).
+                            "refusal": "str",  # Optional. The refusal message generated
+                              by the model (assistant messages only).
+                            "tool_call_id": "str",  # Optional. Tool call that this
+                              message is responding to (tool messages only).
+                            "tool_calls": [
+                                {
+                                    "function": {
+                                        "arguments": "str",  # The arguments
+                                          to call the function with, as generated by the model in JSON
+                                          format. Required.
+                                        "name": "str"  # The name of the
+                                          function to call. Required.
+                                    },
+                                    "id": "str",  # The ID of the tool call.
+                                      Required.
+                                    "type": "str"  # The type of the tool.
+                                      Currently, only function is supported. Required. "function"
+                                }
+                            ]
+                        }
+                    ],
+                    "model": "str",  # Model ID used to generate the response. Required.
+                    "frequency_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on their existing frequency in
+                      the text so far, decreasing the model's likelihood to repeat the same line
+                      verbatim.
+                    "logit_bias": {
+                        "str": 0  # Optional. Modify the likelihood of specified tokens
+                          appearing in the completion. Accepts a JSON object that maps tokens
+                          (specified by their token ID in the tokenizer) to an associated bias value
+                          from -100 to 100. Mathematically, the bias is added to the logits generated
+                          by the model prior to sampling. The exact effect will vary per model, but
+                          values between -1 and 1 should decrease or increase likelihood of selection;
+                          values like -100 or 100 should result in a ban or exclusive selection of the
+                          relevant token.
+                    },
+                    "logprobs": False,  # Optional. Default value is False. Whether to return log
+                      probabilities of the output tokens or not. If true, returns the log probabilities
+                      of each output token returned in the content of message.
+                    "max_completion_tokens": 0,  # Optional. The maximum number of completion
+                      tokens that may be used over the course of the run. The run will make a best
+                      effort to use only the number of completion tokens specified, across multiple
+                      turns of the run.
+                    "max_tokens": 0,  # Optional. The maximum number of tokens that can be
+                      generated in the completion. The token count of your prompt plus max_tokens
+                      cannot exceed the model's context length.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of 16 key-value pairs that can be
+                          attached to an object. This can be useful for storing additional information
+                          about the object in a structured format. Keys are strings with a maximum
+                          length of 64 characters. Values are strings with a maximum length of 512
+                          characters.
+                    },
+                    "n": 1,  # Optional. Default value is 1. How many chat completion choices to
+                      generate for each input message. Note that you will be charged based on the
+                      number of generated tokens across all of the choices. Keep n as 1 to minimize
+                      costs.
+                    "presence_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on whether they appear in the
+                      text so far, increasing the model's likelihood to talk about new topics.
+                    "reasoning_effort": "str",  # Optional. Constrains effort on reasoning for
+                      reasoning models. Reducing reasoning effort can result in faster responses and
+                      fewer tokens used on reasoning in a response. Known values are: "none",
+                      "minimal", "low", "medium", "high", and "xhigh".
+                    "seed": 0,  # Optional. If specified, the system will make a best effort to
+                      sample deterministically, such that repeated requests with the same seed and
+                      parameters should return the same result. Determinism is not guaranteed.
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. If set to true, the
+                      model response data will be streamed to the client as it is generated using
+                      server-sent events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data [DONE] message. The usage field on this chunk
+                          shows the token usage statistics for the entire request, and the choices
+                          field will always be an empty array.
+                    },
+                    "temperature": 0.0,  # Optional. What sampling temperature to use, between 0
+                      and 2. Higher values like 0.8 will make the output more random, while lower
+                      values like 0.2 will make it more focused and deterministic. We generally
+                      recommend altering this or top_p but not both.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "function": {
+                                "name": "str",  # The name of the function to be
+                                  called. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
+                                  with a maximum length of 64. Required.
+                                "description": "str",  # Optional. A description of
+                                  what the function does, used by the model to choose when and how to
+                                  call the function.
+                                "parameters": {
+                                    "str": {}  # Optional. The parameters the
+                                      function accepts, described as a JSON Schema object.
+                                }
+                            },
+                            "type": "str"  # The type of the tool. Currently, only
+                              function is supported. Required. "function"
+                        }
+                    ],
+                    "top_logprobs": 0,  # Optional. An integer between 0 and 20 specifying the
+                      number of most likely tokens to return at each token position, each with an
+                      associated log probability. logprobs must be set to true if this parameter is
+                      used.
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+                      probability mass are considered. We generally recommend altering this or
+                      temperature but not both.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+
+    @overload
+    def create_chat_completion(
+        self,
+        body: IO[bytes],
+        *,
+        agent: bool = True,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation via a customer-provisioned
+        agent endpoint.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword agent: Must be set to true for agent-based completion behavior. Default value is True.
+        :paramtype agent: bool
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+
+    @distributed_trace
+    def create_chat_completion(
+        self, body: Union[JSON, IO[bytes]], *, agent: bool = True, **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a model response for the given chat conversation.
+
+        Creates a model response for the given chat conversation via a customer-provisioned
+        agent endpoint.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :keyword agent: Must be set to true for agent-based completion behavior. Default value is True.
+        :paramtype agent: bool
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "messages": [
+                        {
+                            "role": "str",  # The role of the message author. Required.
+                              Known values are: "system", "developer", "user", "assistant", and "tool".
+                            "content": "str",  # Optional. The contents of the message.
+                            "reasoning_content": "str",  # Optional. The reasoning
+                              content generated by the model (assistant messages only).
+                            "refusal": "str",  # Optional. The refusal message generated
+                              by the model (assistant messages only).
+                            "tool_call_id": "str",  # Optional. Tool call that this
+                              message is responding to (tool messages only).
+                            "tool_calls": [
+                                {
+                                    "function": {
+                                        "arguments": "str",  # The arguments
+                                          to call the function with, as generated by the model in JSON
+                                          format. Required.
+                                        "name": "str"  # The name of the
+                                          function to call. Required.
+                                    },
+                                    "id": "str",  # The ID of the tool call.
+                                      Required.
+                                    "type": "str"  # The type of the tool.
+                                      Currently, only function is supported. Required. "function"
+                                }
+                            ]
+                        }
+                    ],
+                    "model": "str",  # Model ID used to generate the response. Required.
+                    "frequency_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on their existing frequency in
+                      the text so far, decreasing the model's likelihood to repeat the same line
+                      verbatim.
+                    "logit_bias": {
+                        "str": 0  # Optional. Modify the likelihood of specified tokens
+                          appearing in the completion. Accepts a JSON object that maps tokens
+                          (specified by their token ID in the tokenizer) to an associated bias value
+                          from -100 to 100. Mathematically, the bias is added to the logits generated
+                          by the model prior to sampling. The exact effect will vary per model, but
+                          values between -1 and 1 should decrease or increase likelihood of selection;
+                          values like -100 or 100 should result in a ban or exclusive selection of the
+                          relevant token.
+                    },
+                    "logprobs": False,  # Optional. Default value is False. Whether to return log
+                      probabilities of the output tokens or not. If true, returns the log probabilities
+                      of each output token returned in the content of message.
+                    "max_completion_tokens": 0,  # Optional. The maximum number of completion
+                      tokens that may be used over the course of the run. The run will make a best
+                      effort to use only the number of completion tokens specified, across multiple
+                      turns of the run.
+                    "max_tokens": 0,  # Optional. The maximum number of tokens that can be
+                      generated in the completion. The token count of your prompt plus max_tokens
+                      cannot exceed the model's context length.
+                    "metadata": {
+                        "str": "str"  # Optional. Set of 16 key-value pairs that can be
+                          attached to an object. This can be useful for storing additional information
+                          about the object in a structured format. Keys are strings with a maximum
+                          length of 64 characters. Values are strings with a maximum length of 512
+                          characters.
+                    },
+                    "n": 1,  # Optional. Default value is 1. How many chat completion choices to
+                      generate for each input message. Note that you will be charged based on the
+                      number of generated tokens across all of the choices. Keep n as 1 to minimize
+                      costs.
+                    "presence_penalty": 0,  # Optional. Default value is 0. Number between -2.0
+                      and 2.0. Positive values penalize new tokens based on whether they appear in the
+                      text so far, increasing the model's likelihood to talk about new topics.
+                    "reasoning_effort": "str",  # Optional. Constrains effort on reasoning for
+                      reasoning models. Reducing reasoning effort can result in faster responses and
+                      fewer tokens used on reasoning in a response. Known values are: "none",
+                      "minimal", "low", "medium", "high", and "xhigh".
+                    "seed": 0,  # Optional. If specified, the system will make a best effort to
+                      sample deterministically, such that repeated requests with the same seed and
+                      parameters should return the same result. Determinism is not guaranteed.
+                    "stop": {},
+                    "stream": False,  # Optional. Default value is False. If set to true, the
+                      model response data will be streamed to the client as it is generated using
+                      server-sent events.
+                    "stream_options": {
+                        "include_usage": bool  # Optional. If set, an additional chunk will
+                          be streamed before the data [DONE] message. The usage field on this chunk
+                          shows the token usage statistics for the entire request, and the choices
+                          field will always be an empty array.
+                    },
+                    "temperature": 0.0,  # Optional. What sampling temperature to use, between 0
+                      and 2. Higher values like 0.8 will make the output more random, while lower
+                      values like 0.2 will make it more focused and deterministic. We generally
+                      recommend altering this or top_p but not both.
+                    "tool_choice": {},
+                    "tools": [
+                        {
+                            "function": {
+                                "name": "str",  # The name of the function to be
+                                  called. Must be a-z, A-Z, 0-9, or contain underscores and dashes,
+                                  with a maximum length of 64. Required.
+                                "description": "str",  # Optional. A description of
+                                  what the function does, used by the model to choose when and how to
+                                  call the function.
+                                "parameters": {
+                                    "str": {}  # Optional. The parameters the
+                                      function accepts, described as a JSON Schema object.
+                                }
+                            },
+                            "type": "str"  # The type of the tool. Currently, only
+                              function is supported. Required. "function"
+                        }
+                    ],
+                    "top_logprobs": 0,  # Optional. An integer between 0 and 20 specifying the
+                      number of most likely tokens to return at each token position, each with an
+                      associated log probability. logprobs must be set to true if this parameter is
+                      used.
+                    "top_p": 0.0,  # Optional. An alternative to sampling with temperature,
+                      called nucleus sampling, where the model considers the results of the tokens with
+                      top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+                      probability mass are considered. We generally recommend altering this or
+                      temperature but not both.
+                    "user": "str"  # Optional. A unique identifier representing your end-user,
+                      which can help DigitalOcean to monitor and detect abuse.
+                }
+
+                # response body for status code(s): 200
+                response == {
+                    "choices": [
+                        {
+                            "finish_reason": "str",  # The reason the model stopped
+                              generating tokens. stop if the model hit a natural stop point or a
+                              provided stop sequence, length if the maximum number of tokens specified
+                              in the request was reached, tool_calls if the model called a tool.
+                              Required. Known values are: "stop", "length", "tool_calls", and
+                              "content_filter".
+                            "index": 0,  # The index of the choice in the list of
+                              choices. Required.
+                            "logprobs": {
+                                "content": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ],
+                                "refusal": [
+                                    {
+                                        "bytes": [
+                                            0  # A list of integers
+                                              representing the UTF-8 bytes representation of the token.
+                                              Can be null if there is no bytes representation for the
+                                              token. Required.
+                                        ],
+                                        "logprob": 0.0,  # The log
+                                          probability of this token, if it is within the top 20 most
+                                          likely tokens. Otherwise, the value -9999.0 is used to
+                                          signify that the token is very unlikely. Required.
+                                        "token": "str",  # The token.
+                                          Required.
+                                        "top_logprobs": [
+                                            {
+                                                "bytes": [
+                                                    0  #
+                                                      Required.
+                                                ],
+                                                "logprob": 0.0,  #
+                                                  The log probability of this token. Required.
+                                                "token": "str"  # The
+                                                  token. Required.
+                                            }
+                                        ]
+                                    }
+                                ]
+                            },
+                            "message": {
+                                "content": "str",  # The contents of the message.
+                                  Required.
+                                "reasoning_content": "str",  # The reasoning content
+                                  generated by the model. Required.
+                                "refusal": "str",  # The refusal message generated by
+                                  the model. Required.
+                                "role": "str",  # The role of the author of this
+                                  message. Required. "assistant"
+                                "tool_calls": [
+                                    {
+                                        "function": {
+                                            "arguments": "str",  # The
+                                              arguments to call the function with. Required.
+                                            "name": "str"  # The name of
+                                              the function to call. Required.
+                                        },
+                                        "id": "str",  # The ID of the tool
+                                          call. Required.
+                                        "type": "str"  # The type of the
+                                          tool. Required. "function"
+                                    }
+                                ]
+                            }
+                        }
+                    ],
+                    "created": 0,  # The Unix timestamp (in seconds) of when the chat completion
+                      was created. Required.
+                    "id": "str",  # A unique identifier for the chat completion. Required.
+                    "model": "str",  # The model used for the chat completion. Required.
+                    "object": "str",  # The object type, which is always chat.completion.
+                      Required. "chat.completion"
+                    "usage": {
+                        "cache_created_input_tokens": 0,  # Default value is 0. Number of
+                          prompt tokens written to cache. Required.
+                        "cache_creation": {
+                            "ephemeral_1h_input_tokens": 0,  # Default value is 0. Number
+                              of prompt tokens written to 1h cache. Required.
+                            "ephemeral_5m_input_tokens": 0  # Default value is 0. Number
+                              of prompt tokens written to 5m cache. Required.
+                        },
+                        "cache_read_input_tokens": 0,  # Default value is 0. Number of prompt
+                          tokens read from cache. Required.
+                        "completion_tokens": 0,  # Default value is 0. Number of tokens in
+                          the generated completion. Required.
+                        "prompt_tokens": 0,  # Default value is 0. Number of tokens in the
+                          prompt. Required.
+                        "total_tokens": 0  # Default value is 0. Total number of tokens used
+                          in the request (prompt + completion). Required.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_agent_inference_create_chat_completion_request(
+            agent=agent,
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore