From 856344383eceab34daf7ca7d070b71eb5f7396db Mon Sep 17 00:00:00 2001
From: API Engineering <api-engineering@digitalocean.com>
Date: Wed, 11 Mar 2026 06:34:52 +0000
Subject: [PATCH] [bot] Updated client based on openapi-1bd3add/clientgen

---
 DO_OPENAPI_COMMIT_SHA.txt              |    2 +-
 src/pydo/_client.py                    |    6 +
 src/pydo/aio/_client.py                |    6 +
 src/pydo/aio/operations/__init__.py    |    2 +
 src/pydo/aio/operations/_operations.py | 2847 +++++++++++++++++++++
 src/pydo/operations/__init__.py        |    2 +
 src/pydo/operations/_operations.py     | 3168 ++++++++++++++++++++++++
 7 files changed, 6032 insertions(+), 1 deletion(-)

diff --git a/DO_OPENAPI_COMMIT_SHA.txt b/DO_OPENAPI_COMMIT_SHA.txt
index 3e045b5c..dd085343 100644
--- a/DO_OPENAPI_COMMIT_SHA.txt
+++ b/DO_OPENAPI_COMMIT_SHA.txt
@@ -1 +1 @@
-cf0a60a
+1bd3add
diff --git a/src/pydo/_client.py b/src/pydo/_client.py
index 0fe21400..59a62d9a 100644
--- a/src/pydo/_client.py
+++ b/src/pydo/_client.py
@@ -26,6 +26,7 @@
     CdnOperations,
     CertificatesOperations,
     DatabasesOperations,
+    DedicatedInferencesOperations,
     DomainsOperations,
     DropletActionsOperations,
     DropletsOperations,
@@ -597,6 +598,8 @@ class GeneratedClient:  # pylint: disable=client-accepts-api-version-keyword,too
     :vartype billing_insights: pydo.operations.BillingInsightsOperations
     :ivar databases: DatabasesOperations operations
     :vartype databases: pydo.operations.DatabasesOperations
+    :ivar dedicated_inferences: DedicatedInferencesOperations operations
+    :vartype dedicated_inferences: pydo.operations.DedicatedInferencesOperations
     :ivar domains: DomainsOperations operations
     :vartype domains: pydo.operations.DomainsOperations
     :ivar droplets: DropletsOperations operations
@@ -746,6 +749,9 @@ def __init__(
         self.databases = DatabasesOperations(
             self._client, self._config, self._serialize, self._deserialize
         )
+        self.dedicated_inferences = DedicatedInferencesOperations(
+            self._client, self._config, self._serialize, self._deserialize
+        )
         self.domains = DomainsOperations(
             self._client, self._config, self._serialize, self._deserialize
         )
diff --git a/src/pydo/aio/_client.py b/src/pydo/aio/_client.py
index 69354835..2316f3d5 100644
--- a/src/pydo/aio/_client.py
+++ b/src/pydo/aio/_client.py
@@ -26,6 +26,7 @@
     CdnOperations,
     CertificatesOperations,
     DatabasesOperations,
+    DedicatedInferencesOperations,
     DomainsOperations,
     DropletActionsOperations,
     DropletsOperations,
@@ -597,6 +598,8 @@ class GeneratedClient:  # pylint: disable=client-accepts-api-version-keyword,too
     :vartype billing_insights: pydo.aio.operations.BillingInsightsOperations
     :ivar databases: DatabasesOperations operations
     :vartype databases: pydo.aio.operations.DatabasesOperations
+    :ivar dedicated_inferences: DedicatedInferencesOperations operations
+    :vartype dedicated_inferences: pydo.aio.operations.DedicatedInferencesOperations
     :ivar domains: DomainsOperations operations
     :vartype domains: pydo.aio.operations.DomainsOperations
     :ivar droplets: DropletsOperations operations
@@ -746,6 +749,9 @@ def __init__(
         self.databases = DatabasesOperations(
             self._client, self._config, self._serialize, self._deserialize
         )
+        self.dedicated_inferences = DedicatedInferencesOperations(
+            self._client, self._config, self._serialize, self._deserialize
+        )
         self.domains = DomainsOperations(
             self._client, self._config, self._serialize, self._deserialize
         )
diff --git a/src/pydo/aio/operations/__init__.py b/src/pydo/aio/operations/__init__.py
index 4a74b7ce..4325fa0f 100644
--- a/src/pydo/aio/operations/__init__.py
+++ b/src/pydo/aio/operations/__init__.py
@@ -17,6 +17,7 @@
 from ._operations import InvoicesOperations
 from ._operations import BillingInsightsOperations
 from ._operations import DatabasesOperations
+from ._operations import DedicatedInferencesOperations
 from ._operations import DomainsOperations
 from ._operations import DropletsOperations
 from ._operations import DropletActionsOperations
@@ -70,6 +71,7 @@
     "InvoicesOperations",
     "BillingInsightsOperations",
     "DatabasesOperations",
+    "DedicatedInferencesOperations",
     "DomainsOperations",
     "DropletsOperations",
     "DropletActionsOperations",
diff --git a/src/pydo/aio/operations/_operations.py b/src/pydo/aio/operations/_operations.py
index e68d3123..79e518c0 100644
--- a/src/pydo/aio/operations/_operations.py
+++ b/src/pydo/aio/operations/_operations.py
@@ -177,6 +177,19 @@
     build_databases_update_region_request,
     build_databases_update_sql_mode_request,
     build_databases_update_user_request,
+    build_dedicated_inferences_create_request,
+    build_dedicated_inferences_create_tokens_request,
+    build_dedicated_inferences_delete_request,
+    build_dedicated_inferences_delete_tokens_request,
+    build_dedicated_inferences_get_accelerator_request,
+    build_dedicated_inferences_get_ca_request,
+    build_dedicated_inferences_get_gpu_model_config_request,
+    build_dedicated_inferences_get_request,
+    build_dedicated_inferences_list_accelerators_request,
+    build_dedicated_inferences_list_request,
+    build_dedicated_inferences_list_sizes_request,
+    build_dedicated_inferences_list_tokens_request,
+    build_dedicated_inferences_patch_request,
     build_domains_create_record_request,
     build_domains_create_request,
     build_domains_delete_record_request,
@@ -121819,6 +121832,2840 @@ async def delete_opensearch_index(
         return deserialized  # type: ignore
 
 
+class DedicatedInferencesOperations:
+    """
+    .. warning::
+        **DO NOT** instantiate this class directly.
+
+        Instead, you should access the following operations through
+        :class:`~pydo.aio.GeneratedClient`'s
+        :attr:`dedicated_inferences` attribute.
+    """
+
+    def __init__(self, *args, **kwargs) -> None:
+        input_args = list(args)
+        self._client = input_args.pop(0) if input_args else kwargs.pop("client")
+        self._config = input_args.pop(0) if input_args else kwargs.pop("config")
+        self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer")
+        self._deserialize = (
+            input_args.pop(0) if input_args else kwargs.pop("deserializer")
+        )
+
+    @distributed_trace_async
+    async def get(self, dedicated_inference_id: str, **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Get a Dedicated Inference.
+
+        Retrieve an existing Dedicated Inference by ID. Send a GET request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}``. The status in the response
+        is one of active, new, provisioning, updating, deleting, or error.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_get_request(
+            dedicated_inference_id=dedicated_inference_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    async def patch(
+        self,
+        dedicated_inference_id: str,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Update a Dedicated Inference.
+
+        Update an existing Dedicated Inference. Send a PATCH request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}`` with updated ``spec`` and/or
+        ``access_tokens``. Status will move to updating and return to active when done.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "access_tokens": {
+                        "hugging_face_token": "str"  # Optional. Hugging Face token required
+                          for gated models.
+                    },
+                    "spec": {
+                        "enable_public_endpoint": bool,  # Whether to expose a public LLM
+                          endpoint. Required.
+                        "model_deployments": [
+                            {
+                                "accelerators": [
+                                    {
+                                        "accelerator_slug": "str",  #
+                                          DigitalOcean GPU slug. Required.
+                                        "scale": 0,  # Number of accelerator
+                                          instances. Required.
+                                        "type": "str",  # Accelerator type
+                                          (e.g. prefill_decode). Required.
+                                        "status": "str"  # Optional. Current
+                                          state of the Accelerator. Known values are: "new",
+                                          "provisioning", and "active".
+                                    }
+                                ],
+                                "model_id": "str",  # Optional. Used to identify an
+                                  existing deployment when updating; empty means create new.
+                                "model_provider": "str",  # Optional. Model provider.
+                                  "hugging_face"
+                                "model_slug": "str",  # Optional. Model identifier
+                                  (e.g. Hugging Face slug).
+                                "workload_config": {}  # Optional. Workload-specific
+                                  configuration (e.g. ISL/OSL in future).
+                            }
+                        ],
+                        "name": "str",  # Name of the Dedicated Inference. Must be unique
+                          within the team. Required.
+                        "region": "str",  # DigitalOcean region where the Dedicated Inference
+                          is hosted. Required. Known values are: "atl1", "nyc2", and "tor1".
+                        "version": 0,  # Spec version. Required.
+                        "vpc": {
+                            "uuid": "str"  # VPC UUID for the Dedicated Inference.
+                              Required.
+                        }
+                    }
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+
+    @overload
+    async def patch(
+        self,
+        dedicated_inference_id: str,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Update a Dedicated Inference.
+
+        Update an existing Dedicated Inference. Send a PATCH request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}`` with updated ``spec`` and/or
+        ``access_tokens``. Status will move to updating and return to active when done.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+
+    @distributed_trace_async
+    async def patch(
+        self, dedicated_inference_id: str, body: Union[JSON, IO[bytes]], **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Update a Dedicated Inference.
+
+        Update an existing Dedicated Inference. Send a PATCH request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}`` with updated ``spec`` and/or
+        ``access_tokens``. Status will move to updating and return to active when done.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "access_tokens": {
+                        "hugging_face_token": "str"  # Optional. Hugging Face token required
+                          for gated models.
+                    },
+                    "spec": {
+                        "enable_public_endpoint": bool,  # Whether to expose a public LLM
+                          endpoint. Required.
+                        "model_deployments": [
+                            {
+                                "accelerators": [
+                                    {
+                                        "accelerator_slug": "str",  #
+                                          DigitalOcean GPU slug. Required.
+                                        "scale": 0,  # Number of accelerator
+                                          instances. Required.
+                                        "type": "str",  # Accelerator type
+                                          (e.g. prefill_decode). Required.
+                                        "status": "str"  # Optional. Current
+                                          state of the Accelerator. Known values are: "new",
+                                          "provisioning", and "active".
+                                    }
+                                ],
+                                "model_id": "str",  # Optional. Used to identify an
+                                  existing deployment when updating; empty means create new.
+                                "model_provider": "str",  # Optional. Model provider.
+                                  "hugging_face"
+                                "model_slug": "str",  # Optional. Model identifier
+                                  (e.g. Hugging Face slug).
+                                "workload_config": {}  # Optional. Workload-specific
+                                  configuration (e.g. ISL/OSL in future).
+                            }
+                        ],
+                        "name": "str",  # Name of the Dedicated Inference. Must be unique
+                          within the team. Required.
+                        "region": "str",  # DigitalOcean region where the Dedicated Inference
+                          is hosted. Required. Known values are: "atl1", "nyc2", and "tor1".
+                        "version": 0,  # Spec version. Required.
+                        "vpc": {
+                            "uuid": "str"  # VPC UUID for the Dedicated Inference.
+                              Required.
+                        }
+                    }
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_dedicated_inferences_patch_request(
+            dedicated_inference_id=dedicated_inference_id,
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202, 404]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 202:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace_async
+    async def delete(
+        self, dedicated_inference_id: str, **kwargs: Any
+    ) -> Optional[JSON]:
+        # pylint: disable=line-too-long
+        """Delete a Dedicated Inference.
+
+        Delete an existing Dedicated Inference. Send a DELETE request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}``. The response 202 Accepted
+        indicates the request was accepted for processing.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :return: JSON object or None
+        :rtype: JSON or None
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[Optional[JSON]] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_delete_request(
+            dedicated_inference_id=dedicated_inference_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202, 404]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        deserialized = None
+        response_headers = {}
+        if response.status_code == 202:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, deserialized, response_headers)  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def list(
+        self,
+        *,
+        per_page: int = 20,
+        page: int = 1,
+        region: Optional[str] = None,
+        **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """List Dedicated Inferences.
+
+        List all Dedicated Inference instances for your team. Send a GET request to
+        ``/v2/dedicated-inferences``. You may filter by region and use page and per_page
+        for pagination.
+
+        :keyword per_page: Number of items returned per page. Default value is 20.
+        :paramtype per_page: int
+        :keyword page: Which 'page' of paginated results to return. Default value is 1.
+        :paramtype page: int
+        :keyword region: Filter by region. Dedicated Inference is only available in nyc2, tor1, and
+         atl1. Known values are: "nyc2", "tor1", and "atl1". Default value is None.
+        :paramtype region: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "dedicated_inferences": [
+                        {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. When the
+                              Dedicated Inference was created.
+                            "endpoints": {
+                                "private_endpoint_fqdn": "str",  # Optional. Private
+                                  VPC FQDN of the Dedicated Inference instance.
+                                "public_endpoint_fqdn": "str"  # Optional. Public
+                                  FQDN of the Dedicated Inference instance.
+                            },
+                            "id": "str",  # Optional. Unique ID of the Dedicated
+                              Inference.
+                            "pending_deployment_spec": {
+                                "created_at": "2020-02-20 00:00:00",  # Optional.
+                                  Pending deployment when status is provisioning or updating.
+                                "enable_public_endpoint": bool,  # Optional. Whether
+                                  to expose a public LLM endpoint.
+                                "id": "str",  # Optional. Deployment UUID.
+                                "model_deployments": [
+                                    {
+                                        "accelerators": [
+                                            {
+                                                "accelerator_slug":
+                                                  "str",  # DigitalOcean GPU slug. Required.
+                                                "scale": 0,  # Number
+                                                  of accelerator instances. Required.
+                                                "type": "str",  #
+                                                  Accelerator type (e.g. prefill_decode). Required.
+                                                "status": "str"  #
+                                                  Optional. Current state of the Accelerator. Known
+                                                  values are: "new", "provisioning", and "active".
+                                            }
+                                        ],
+                                        "model_id": "str",  # Optional. Used
+                                          to identify an existing deployment when updating; empty means
+                                          create new.
+                                        "model_provider": "str",  # Optional.
+                                          Model provider. "hugging_face"
+                                        "model_slug": "str",  # Optional.
+                                          Model identifier (e.g. Hugging Face slug).
+                                        "workload_config": {}  # Optional.
+                                          Workload-specific configuration (e.g. ISL/OSL in future).
+                                    }
+                                ],
+                                "name": "str",  # Optional. Name of the Dedicated
+                                  Inference. Must be unique within the team.
+                                "status": "str",  # Optional. Known values are:
+                                  "provisioning" and "updating".
+                                "updated_at": "2020-02-20 00:00:00",  # Optional.
+                                  Pending deployment when status is provisioning or updating.
+                                "version": 0,  # Optional. Spec version.
+                                "vpc": {
+                                    "uuid": "str"  # VPC UUID for the Dedicated
+                                      Inference. Required.
+                                }
+                            },
+                            "region": "str",  # Optional. DigitalOcean region where the
+                              Dedicated Inference is hosted.
+                            "spec": {
+                                "enable_public_endpoint": bool,  # Whether to expose
+                                  a public LLM endpoint. Required.
+                                "model_deployments": [
+                                    {
+                                        "accelerators": [
+                                            {
+                                                "accelerator_slug":
+                                                  "str",  # DigitalOcean GPU slug. Required.
+                                                "scale": 0,  # Number
+                                                  of accelerator instances. Required.
+                                                "type": "str",  #
+                                                  Accelerator type (e.g. prefill_decode). Required.
+                                                "status": "str"  #
+                                                  Optional. Current state of the Accelerator. Known
+                                                  values are: "new", "provisioning", and "active".
+                                            }
+                                        ],
+                                        "model_id": "str",  # Optional. Used
+                                          to identify an existing deployment when updating; empty means
+                                          create new.
+                                        "model_provider": "str",  # Optional.
+                                          Model provider. "hugging_face"
+                                        "model_slug": "str",  # Optional.
+                                          Model identifier (e.g. Hugging Face slug).
+                                        "workload_config": {}  # Optional.
+                                          Workload-specific configuration (e.g. ISL/OSL in future).
+                                    }
+                                ],
+                                "name": "str",  # Name of the Dedicated Inference.
+                                  Must be unique within the team. Required.
+                                "region": "str",  # DigitalOcean region where the
+                                  Dedicated Inference is hosted. Required. Known values are: "atl1",
+                                  "nyc2", and "tor1".
+                                "version": 0,  # Spec version. Required.
+                                "vpc": {
+                                    "uuid": "str"  # VPC UUID for the Dedicated
+                                      Inference. Required.
+                                }
+                            },
+                            "status": "str",  # Optional. Current state of the Dedicated
+                              Inference. Known values are: "active", "new", "provisioning", "updating",
+                              "deleting", and "error".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. When the
+                              Dedicated Inference was last updated.
+                            "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated
+                              Inference.
+                        }
+                    ],
+                    "links": {
+                        "pages": {
+                            "str": "str"  # Optional. Pagination links (first, prev,
+                              next, last).
+                        }
+                    },
+                    "meta": {
+                        "total": 0  # Total number of results. Required.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_list_request(
+            per_page=per_page,
+            page=page,
+            region=region,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    async def create(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference.
+
+        Create a new Dedicated Inference for your team. Send a POST request to
+        ``/v2/dedicated-inferences`` with a ``spec`` object (version, name, region, vpc,
+        enable_public_endpoint, model_deployments) and optional ``access_tokens`` (e.g.
+        hugging_face_token for gated models). The response code 202 Accepted indicates
+        the request was accepted for processing; it does not indicate success or failure.
+        The token value is returned only on create; store it securely.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "spec": {
+                        "enable_public_endpoint": bool,  # Whether to expose a public LLM
+                          endpoint. Required.
+                        "model_deployments": [
+                            {
+                                "accelerators": [
+                                    {
+                                        "accelerator_slug": "str",  #
+                                          DigitalOcean GPU slug. Required.
+                                        "scale": 0,  # Number of accelerator
+                                          instances. Required.
+                                        "type": "str",  # Accelerator type
+                                          (e.g. prefill_decode). Required.
+                                        "status": "str"  # Optional. Current
+                                          state of the Accelerator. Known values are: "new",
+                                          "provisioning", and "active".
+                                    }
+                                ],
+                                "model_id": "str",  # Optional. Used to identify an
+                                  existing deployment when updating; empty means create new.
+                                "model_provider": "str",  # Optional. Model provider.
+                                  "hugging_face"
+                                "model_slug": "str",  # Optional. Model identifier
+                                  (e.g. Hugging Face slug).
+                                "workload_config": {}  # Optional. Workload-specific
+                                  configuration (e.g. ISL/OSL in future).
+                            }
+                        ],
+                        "name": "str",  # Name of the Dedicated Inference. Must be unique
+                          within the team. Required.
+                        "region": "str",  # DigitalOcean region where the Dedicated Inference
+                          is hosted. Required. Known values are: "atl1", "nyc2", and "tor1".
+                        "version": 0,  # Spec version. Required.
+                        "vpc": {
+                            "uuid": "str"  # VPC UUID for the Dedicated Inference.
+                              Required.
+                        }
+                    },
+                    "access_tokens": {
+                        "str": "str"  # Optional. Key-value pairs for provider tokens (e.g.
+                          Hugging Face).
+                    }
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    },
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+        """
+
+    @overload
+    async def create(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference.
+
+        Create a new Dedicated Inference for your team. Send a POST request to
+        ``/v2/dedicated-inferences`` with a ``spec`` object (version, name, region, vpc,
+        enable_public_endpoint, model_deployments) and optional ``access_tokens`` (e.g.
+        hugging_face_token for gated models). The response code 202 Accepted indicates
+        the request was accepted for processing; it does not indicate success or failure.
+        The token value is returned only on create; store it securely.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    },
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+        """
+
+    @distributed_trace_async
+    async def create(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference.
+
+        Create a new Dedicated Inference for your team. Send a POST request to
+        ``/v2/dedicated-inferences`` with a ``spec`` object (version, name, region, vpc,
+        enable_public_endpoint, model_deployments) and optional ``access_tokens`` (e.g.
+        hugging_face_token for gated models). The response code 202 Accepted indicates
+        the request was accepted for processing; it does not indicate success or failure.
+        The token value is returned only on create; store it securely.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "spec": {
+                        "enable_public_endpoint": bool,  # Whether to expose a public LLM
+                          endpoint. Required.
+                        "model_deployments": [
+                            {
+                                "accelerators": [
+                                    {
+                                        "accelerator_slug": "str",  #
+                                          DigitalOcean GPU slug. Required.
+                                        "scale": 0,  # Number of accelerator
+                                          instances. Required.
+                                        "type": "str",  # Accelerator type
+                                          (e.g. prefill_decode). Required.
+                                        "status": "str"  # Optional. Current
+                                          state of the Accelerator. Known values are: "new",
+                                          "provisioning", and "active".
+                                    }
+                                ],
+                                "model_id": "str",  # Optional. Used to identify an
+                                  existing deployment when updating; empty means create new.
+                                "model_provider": "str",  # Optional. Model provider.
+                                  "hugging_face"
+                                "model_slug": "str",  # Optional. Model identifier
+                                  (e.g. Hugging Face slug).
+                                "workload_config": {}  # Optional. Workload-specific
+                                  configuration (e.g. ISL/OSL in future).
+                            }
+                        ],
+                        "name": "str",  # Name of the Dedicated Inference. Must be unique
+                          within the team. Required.
+                        "region": "str",  # DigitalOcean region where the Dedicated Inference
+                          is hosted. Required. Known values are: "atl1", "nyc2", and "tor1".
+                        "version": 0,  # Spec version. Required.
+                        "vpc": {
+                            "uuid": "str"  # VPC UUID for the Dedicated Inference.
+                              Required.
+                        }
+                    },
+                    "access_tokens": {
+                        "str": "str"  # Optional. Key-value pairs for provider tokens (e.g.
+                          Hugging Face).
+                    }
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    },
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_dedicated_inferences_create_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace_async
+    async def list_accelerators(
+        self,
+        dedicated_inference_id: str,
+        *,
+        per_page: int = 20,
+        page: int = 1,
+        slug: Optional[str] = None,
+        **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """List Dedicated Inference Accelerators.
+
+        List all accelerators (GPUs) in use by a Dedicated Inference instance. Send a
+        GET request to ``/v2/dedicated-inferences/{dedicated_inference_id}/accelerators``.
+        Optionally filter by slug and use page/per_page for pagination.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :keyword per_page: Number of items returned per page. Default value is 20.
+        :paramtype per_page: int
+        :keyword page: Which 'page' of paginated results to return. Default value is 1.
+        :paramtype page: int
+        :keyword slug: Filter accelerators by GPU slug. Default value is None.
+        :paramtype slug: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "meta": {
+                        "total": 0  # Optional. Number of objects returned by the request.
+                    },
+                    "accelerators": [
+                        {
+                            "created_at": "2020-02-20 00:00:00",  # Optional.
+                            "id": "str",  # Optional. Unique ID of the accelerator.
+                            "name": "str",  # Optional. Name of the accelerator.
+                            "role": "str",  # Optional. Role of the accelerator (e.g.
+                              prefill_decode).
+                            "slug": "str",  # Optional. DigitalOcean GPU slug.
+                            "status": "str"  # Optional. Status of the accelerator.
+                        }
+                    ],
+                    "links": {
+                        "pages": {}
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_list_accelerators_request(
+            dedicated_inference_id=dedicated_inference_id,
+            per_page=per_page,
+            page=page,
+            slug=slug,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace_async
+    async def get_accelerator(
+        self, dedicated_inference_id: str, accelerator_id: str, **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Get a Dedicated Inference Accelerator.
+
+        Retrieve a single accelerator by ID for a Dedicated Inference instance. Send a
+        GET request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}/accelerators/{accelerator_id}``.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param accelerator_id: A unique identifier for a Dedicated Inference accelerator. Required.
+        :type accelerator_id: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "created_at": "2020-02-20 00:00:00",  # Optional.
+                    "id": "str",  # Optional. Unique ID of the accelerator.
+                    "name": "str",  # Optional. Name of the accelerator.
+                    "role": "str",  # Optional. Role of the accelerator (e.g. prefill_decode).
+                    "slug": "str",  # Optional. DigitalOcean GPU slug.
+                    "status": "str"  # Optional. Status of the accelerator.
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_get_accelerator_request(
+            dedicated_inference_id=dedicated_inference_id,
+            accelerator_id=accelerator_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace_async
+    async def get_ca(self, dedicated_inference_id: str, **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Get Dedicated Inference CA Certificate.
+
+        Get the CA certificate for a Dedicated Inference instance (base64-encoded).
+        Required for private endpoint connectivity. Send a GET request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}/ca``.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "cert": "str"  # Base64-encoded CA certificate. Required.
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_get_ca_request(
+            dedicated_inference_id=dedicated_inference_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace_async
+    async def list_tokens(
+        self,
+        dedicated_inference_id: str,
+        *,
+        per_page: int = 20,
+        page: int = 1,
+        **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """List Dedicated Inference Tokens.
+
+        List all access tokens for a Dedicated Inference instance. Token values are
+        not returned; only id, name, and created_at. Send a GET request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens``.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :keyword per_page: Number of items returned per page. Default value is 20.
+        :paramtype per_page: int
+        :keyword page: Which 'page' of paginated results to return. Default value is 1.
+        :paramtype page: int
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "meta": {
+                        "total": 0  # Optional. Number of objects returned by the request.
+                    },
+                    "links": {
+                        "pages": {}
+                    },
+                    "tokens": [
+                        {
+                            "created_at": "2020-02-20 00:00:00",  # Optional.
+                            "id": "str",  # Optional. Unique ID of the token.
+                            "name": "str",  # Optional. Name of the token.
+                            "value": "str"  # Optional. Token value; only returned once
+                              on create. Store securely.
+                        }
+                    ]
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_list_tokens_request(
+            dedicated_inference_id=dedicated_inference_id,
+            per_page=per_page,
+            page=page,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    async def create_tokens(
+        self,
+        dedicated_inference_id: str,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference Token.
+
+        Create a new access token for a Dedicated Inference instance. Send a POST
+        request to ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens`` with a
+        ``name``. The token value is returned only once in the response; store it securely.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "name": "str"  # Name for the new token. Required.
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+
+    @overload
+    async def create_tokens(
+        self,
+        dedicated_inference_id: str,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference Token.
+
+        Create a new access token for a Dedicated Inference instance. Send a POST
+        request to ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens`` with a
+        ``name``. The token value is returned only once in the response; store it securely.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 202
+                response == {
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+
+    @distributed_trace_async
+    async def create_tokens(
+        self, dedicated_inference_id: str, body: Union[JSON, IO[bytes]], **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference Token.
+
+        Create a new access token for a Dedicated Inference instance. Send a POST
+        request to ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens`` with a
+        ``name``. The token value is returned only once in the response; store it securely.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "name": "str"  # Name for the new token. Required.
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_dedicated_inferences_create_tokens_request(
+            dedicated_inference_id=dedicated_inference_id,
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202, 404]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 202:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace_async
+    async def delete_tokens(
+        self, dedicated_inference_id: str, token_id: str, **kwargs: Any
+    ) -> Optional[JSON]:
+        # pylint: disable=line-too-long
+        """Revoke a Dedicated Inference Token.
+
+        Revoke (delete) an access token for a Dedicated Inference instance. Send a
+        DELETE request to ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens/{token_id}``.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param token_id: A unique identifier for a Dedicated Inference access token. Required.
+        :type token_id: str
+        :return: JSON object or None
+        :rtype: JSON or None
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[Optional[JSON]] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_delete_tokens_request(
+            dedicated_inference_id=dedicated_inference_id,
+            token_id=token_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [204, 404]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        deserialized = None
+        response_headers = {}
+        if response.status_code == 204:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, deserialized, response_headers)  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace_async
+    async def list_sizes(self, **kwargs: Any) -> JSON:
+        """List Dedicated Inference Sizes.
+
+        Get available Dedicated Inference sizes and pricing for supported GPUs. Send a
+        GET request to ``/v2/dedicated-inferences/sizes``.
+
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "enabled_regions": [
+                        "str"  # Optional. Regions where Dedicated Inference is available.
+                    ],
+                    "sizes": [
+                        {
+                            "currency": "str",  # Optional.
+                            "gpu_slug": "str",  # Optional.
+                            "price_per_hour": "str",  # Optional.
+                            "region": "str"  # Optional.
+                        }
+                    ]
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_list_sizes_request(
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace_async
+    async def get_gpu_model_config(self, **kwargs: Any) -> JSON:
+        """Get Dedicated Inference GPU Model Config.
+
+        Get supported GPU and model configurations for Dedicated Inference. Use this to
+        discover supported GPU slugs and model slugs (e.g. Hugging Face). Send a GET
+        request to ``/v2/dedicated-inferences/gpu-model-config``.
+
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "gpu_model_configs": [
+                        {
+                            "gpu_slugs": [
+                                "str"  # Optional.
+                            ],
+                            "is_gated_model": bool,  # Optional. Whether the model
+                              requires gated access (e.g. Hugging Face token).
+                            "model_name": "str",  # Optional.
+                            "model_slug": "str"  # Optional.
+                        }
+                    ]
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_get_gpu_model_config_request(
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            await self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                await response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+
 class DomainsOperations:
     """
     .. warning::
diff --git a/src/pydo/operations/__init__.py b/src/pydo/operations/__init__.py
index 4a74b7ce..4325fa0f 100644
--- a/src/pydo/operations/__init__.py
+++ b/src/pydo/operations/__init__.py
@@ -17,6 +17,7 @@
 from ._operations import InvoicesOperations
 from ._operations import BillingInsightsOperations
 from ._operations import DatabasesOperations
+from ._operations import DedicatedInferencesOperations
 from ._operations import DomainsOperations
 from ._operations import DropletsOperations
 from ._operations import DropletActionsOperations
@@ -70,6 +71,7 @@
     "InvoicesOperations",
     "BillingInsightsOperations",
     "DatabasesOperations",
+    "DedicatedInferencesOperations",
     "DomainsOperations",
     "DropletsOperations",
     "DropletActionsOperations",
diff --git a/src/pydo/operations/_operations.py b/src/pydo/operations/_operations.py
index f06ed50c..757cbda6 100644
--- a/src/pydo/operations/_operations.py
+++ b/src/pydo/operations/_operations.py
@@ -3581,6 +3581,342 @@ def build_databases_delete_opensearch_index_request(  # pylint: disable=name-too
     return HttpRequest(method="DELETE", url=_url, headers=_headers, **kwargs)
 
 
+def build_dedicated_inferences_get_request(
+    dedicated_inference_id: str, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/{dedicated_inference_id}"
+    path_format_arguments = {
+        "dedicated_inference_id": _SERIALIZER.url(
+            "dedicated_inference_id", dedicated_inference_id, "str"
+        ),
+    }
+
+    _url: str = _url.format(**path_format_arguments)  # type: ignore
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="GET", url=_url, headers=_headers, **kwargs)
+
+
+def build_dedicated_inferences_patch_request(
+    dedicated_inference_id: str, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop(
+        "content_type", _headers.pop("Content-Type", None)
+    )
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/{dedicated_inference_id}"
+    path_format_arguments = {
+        "dedicated_inference_id": _SERIALIZER.url(
+            "dedicated_inference_id", dedicated_inference_id, "str"
+        ),
+    }
+
+    _url: str = _url.format(**path_format_arguments)  # type: ignore
+
+    # Construct headers
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header(
+            "content_type", content_type, "str"
+        )
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="PATCH", url=_url, headers=_headers, **kwargs)
+
+
+def build_dedicated_inferences_delete_request(  # pylint: disable=name-too-long
+    dedicated_inference_id: str, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/{dedicated_inference_id}"
+    path_format_arguments = {
+        "dedicated_inference_id": _SERIALIZER.url(
+            "dedicated_inference_id", dedicated_inference_id, "str"
+        ),
+    }
+
+    _url: str = _url.format(**path_format_arguments)  # type: ignore
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="DELETE", url=_url, headers=_headers, **kwargs)
+
+
+def build_dedicated_inferences_list_request(
+    *, per_page: int = 20, page: int = 1, region: Optional[str] = None, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences"
+
+    # Construct parameters
+    if per_page is not None:
+        _params["per_page"] = _SERIALIZER.query(
+            "per_page", per_page, "int", maximum=200, minimum=1
+        )
+    if page is not None:
+        _params["page"] = _SERIALIZER.query("page", page, "int", minimum=1)
+    if region is not None:
+        _params["region"] = _SERIALIZER.query("region", region, "str")
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(
+        method="GET", url=_url, params=_params, headers=_headers, **kwargs
+    )
+
+
+def build_dedicated_inferences_create_request(
+    **kwargs: Any,
+) -> HttpRequest:  # pylint: disable=name-too-long
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop(
+        "content_type", _headers.pop("Content-Type", None)
+    )
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences"
+
+    # Construct headers
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header(
+            "content_type", content_type, "str"
+        )
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs)
+
+
+def build_dedicated_inferences_list_accelerators_request(  # pylint: disable=name-too-long
+    dedicated_inference_id: str,
+    *,
+    per_page: int = 20,
+    page: int = 1,
+    slug: Optional[str] = None,
+    **kwargs: Any,
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/{dedicated_inference_id}/accelerators"
+    path_format_arguments = {
+        "dedicated_inference_id": _SERIALIZER.url(
+            "dedicated_inference_id", dedicated_inference_id, "str"
+        ),
+    }
+
+    _url: str = _url.format(**path_format_arguments)  # type: ignore
+
+    # Construct parameters
+    if per_page is not None:
+        _params["per_page"] = _SERIALIZER.query(
+            "per_page", per_page, "int", maximum=200, minimum=1
+        )
+    if page is not None:
+        _params["page"] = _SERIALIZER.query("page", page, "int", minimum=1)
+    if slug is not None:
+        _params["slug"] = _SERIALIZER.query("slug", slug, "str")
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(
+        method="GET", url=_url, params=_params, headers=_headers, **kwargs
+    )
+
+
+def build_dedicated_inferences_get_accelerator_request(  # pylint: disable=name-too-long
+    dedicated_inference_id: str, accelerator_id: str, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/{dedicated_inference_id}/accelerators/{accelerator_id}"
+    path_format_arguments = {
+        "dedicated_inference_id": _SERIALIZER.url(
+            "dedicated_inference_id", dedicated_inference_id, "str"
+        ),
+        "accelerator_id": _SERIALIZER.url("accelerator_id", accelerator_id, "str"),
+    }
+
+    _url: str = _url.format(**path_format_arguments)  # type: ignore
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="GET", url=_url, headers=_headers, **kwargs)
+
+
+def build_dedicated_inferences_get_ca_request(  # pylint: disable=name-too-long
+    dedicated_inference_id: str, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/{dedicated_inference_id}/ca"
+    path_format_arguments = {
+        "dedicated_inference_id": _SERIALIZER.url(
+            "dedicated_inference_id", dedicated_inference_id, "str"
+        ),
+    }
+
+    _url: str = _url.format(**path_format_arguments)  # type: ignore
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="GET", url=_url, headers=_headers, **kwargs)
+
+
+def build_dedicated_inferences_list_tokens_request(  # pylint: disable=name-too-long
+    dedicated_inference_id: str, *, per_page: int = 20, page: int = 1, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+    _params = case_insensitive_dict(kwargs.pop("params", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/{dedicated_inference_id}/tokens"
+    path_format_arguments = {
+        "dedicated_inference_id": _SERIALIZER.url(
+            "dedicated_inference_id", dedicated_inference_id, "str"
+        ),
+    }
+
+    _url: str = _url.format(**path_format_arguments)  # type: ignore
+
+    # Construct parameters
+    if per_page is not None:
+        _params["per_page"] = _SERIALIZER.query(
+            "per_page", per_page, "int", maximum=200, minimum=1
+        )
+    if page is not None:
+        _params["page"] = _SERIALIZER.query("page", page, "int", minimum=1)
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(
+        method="GET", url=_url, params=_params, headers=_headers, **kwargs
+    )
+
+
+def build_dedicated_inferences_create_tokens_request(  # pylint: disable=name-too-long
+    dedicated_inference_id: str, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    content_type: Optional[str] = kwargs.pop(
+        "content_type", _headers.pop("Content-Type", None)
+    )
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/{dedicated_inference_id}/tokens"
+    path_format_arguments = {
+        "dedicated_inference_id": _SERIALIZER.url(
+            "dedicated_inference_id", dedicated_inference_id, "str"
+        ),
+    }
+
+    _url: str = _url.format(**path_format_arguments)  # type: ignore
+
+    # Construct headers
+    if content_type is not None:
+        _headers["Content-Type"] = _SERIALIZER.header(
+            "content_type", content_type, "str"
+        )
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="POST", url=_url, headers=_headers, **kwargs)
+
+
+def build_dedicated_inferences_delete_tokens_request(  # pylint: disable=name-too-long
+    dedicated_inference_id: str, token_id: str, **kwargs: Any
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/{dedicated_inference_id}/tokens/{token_id}"
+    path_format_arguments = {
+        "dedicated_inference_id": _SERIALIZER.url(
+            "dedicated_inference_id", dedicated_inference_id, "str"
+        ),
+        "token_id": _SERIALIZER.url("token_id", token_id, "str"),
+    }
+
+    _url: str = _url.format(**path_format_arguments)  # type: ignore
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="DELETE", url=_url, headers=_headers, **kwargs)
+
+
+def build_dedicated_inferences_list_sizes_request(
+    **kwargs: Any,
+) -> HttpRequest:  # pylint: disable=name-too-long
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/sizes"
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="GET", url=_url, headers=_headers, **kwargs)
+
+
+def build_dedicated_inferences_get_gpu_model_config_request(  # pylint: disable=name-too-long
+    **kwargs: Any,
+) -> HttpRequest:
+    _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+
+    accept = _headers.pop("Accept", "application/json")
+
+    # Construct URL
+    _url = "/v2/dedicated-inferences/gpu-model-config"
+
+    # Construct headers
+    _headers["Accept"] = _SERIALIZER.header("accept", accept, "str")
+
+    return HttpRequest(method="GET", url=_url, headers=_headers, **kwargs)
+
+
 def build_domains_list_request(
     *, per_page: int = 20, page: int = 1, **kwargs: Any
 ) -> HttpRequest:
@@ -135007,6 +135343,2838 @@ def delete_opensearch_index(
         return deserialized  # type: ignore
 
 
+class DedicatedInferencesOperations:
+    """
+    .. warning::
+        **DO NOT** instantiate this class directly.
+
+        Instead, you should access the following operations through
+        :class:`~pydo.GeneratedClient`'s
+        :attr:`dedicated_inferences` attribute.
+    """
+
+    def __init__(self, *args, **kwargs):
+        input_args = list(args)
+        self._client = input_args.pop(0) if input_args else kwargs.pop("client")
+        self._config = input_args.pop(0) if input_args else kwargs.pop("config")
+        self._serialize = input_args.pop(0) if input_args else kwargs.pop("serializer")
+        self._deserialize = (
+            input_args.pop(0) if input_args else kwargs.pop("deserializer")
+        )
+
+    @distributed_trace
+    def get(self, dedicated_inference_id: str, **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Get a Dedicated Inference.
+
+        Retrieve an existing Dedicated Inference by ID. Send a GET request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}``. The status in the response
+        is one of active, new, provisioning, updating, deleting, or error.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_get_request(
+            dedicated_inference_id=dedicated_inference_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    def patch(
+        self,
+        dedicated_inference_id: str,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Update a Dedicated Inference.
+
+        Update an existing Dedicated Inference. Send a PATCH request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}`` with updated ``spec`` and/or
+        ``access_tokens``. Status will move to updating and return to active when done.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "access_tokens": {
+                        "hugging_face_token": "str"  # Optional. Hugging Face token required
+                          for gated models.
+                    },
+                    "spec": {
+                        "enable_public_endpoint": bool,  # Whether to expose a public LLM
+                          endpoint. Required.
+                        "model_deployments": [
+                            {
+                                "accelerators": [
+                                    {
+                                        "accelerator_slug": "str",  #
+                                          DigitalOcean GPU slug. Required.
+                                        "scale": 0,  # Number of accelerator
+                                          instances. Required.
+                                        "type": "str",  # Accelerator type
+                                          (e.g. prefill_decode). Required.
+                                        "status": "str"  # Optional. Current
+                                          state of the Accelerator. Known values are: "new",
+                                          "provisioning", and "active".
+                                    }
+                                ],
+                                "model_id": "str",  # Optional. Used to identify an
+                                  existing deployment when updating; empty means create new.
+                                "model_provider": "str",  # Optional. Model provider.
+                                  "hugging_face"
+                                "model_slug": "str",  # Optional. Model identifier
+                                  (e.g. Hugging Face slug).
+                                "workload_config": {}  # Optional. Workload-specific
+                                  configuration (e.g. ISL/OSL in future).
+                            }
+                        ],
+                        "name": "str",  # Name of the Dedicated Inference. Must be unique
+                          within the team. Required.
+                        "region": "str",  # DigitalOcean region where the Dedicated Inference
+                          is hosted. Required. Known values are: "atl1", "nyc2", and "tor1".
+                        "version": 0,  # Spec version. Required.
+                        "vpc": {
+                            "uuid": "str"  # VPC UUID for the Dedicated Inference.
+                              Required.
+                        }
+                    }
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+
+    @overload
+    def patch(
+        self,
+        dedicated_inference_id: str,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Update a Dedicated Inference.
+
+        Update an existing Dedicated Inference. Send a PATCH request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}`` with updated ``spec`` and/or
+        ``access_tokens``. Status will move to updating and return to active when done.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+
+    @distributed_trace
+    def patch(
+        self, dedicated_inference_id: str, body: Union[JSON, IO[bytes]], **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Update a Dedicated Inference.
+
+        Update an existing Dedicated Inference. Send a PATCH request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}`` with updated ``spec`` and/or
+        ``access_tokens``. Status will move to updating and return to active when done.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "access_tokens": {
+                        "hugging_face_token": "str"  # Optional. Hugging Face token required
+                          for gated models.
+                    },
+                    "spec": {
+                        "enable_public_endpoint": bool,  # Whether to expose a public LLM
+                          endpoint. Required.
+                        "model_deployments": [
+                            {
+                                "accelerators": [
+                                    {
+                                        "accelerator_slug": "str",  #
+                                          DigitalOcean GPU slug. Required.
+                                        "scale": 0,  # Number of accelerator
+                                          instances. Required.
+                                        "type": "str",  # Accelerator type
+                                          (e.g. prefill_decode). Required.
+                                        "status": "str"  # Optional. Current
+                                          state of the Accelerator. Known values are: "new",
+                                          "provisioning", and "active".
+                                    }
+                                ],
+                                "model_id": "str",  # Optional. Used to identify an
+                                  existing deployment when updating; empty means create new.
+                                "model_provider": "str",  # Optional. Model provider.
+                                  "hugging_face"
+                                "model_slug": "str",  # Optional. Model identifier
+                                  (e.g. Hugging Face slug).
+                                "workload_config": {}  # Optional. Workload-specific
+                                  configuration (e.g. ISL/OSL in future).
+                            }
+                        ],
+                        "name": "str",  # Name of the Dedicated Inference. Must be unique
+                          within the team. Required.
+                        "region": "str",  # DigitalOcean region where the Dedicated Inference
+                          is hosted. Required. Known values are: "atl1", "nyc2", and "tor1".
+                        "version": 0,  # Spec version. Required.
+                        "vpc": {
+                            "uuid": "str"  # VPC UUID for the Dedicated Inference.
+                              Required.
+                        }
+                    }
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_dedicated_inferences_patch_request(
+            dedicated_inference_id=dedicated_inference_id,
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202, 404]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 202:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace
+    def delete(self, dedicated_inference_id: str, **kwargs: Any) -> Optional[JSON]:
+        # pylint: disable=line-too-long
+        """Delete a Dedicated Inference.
+
+        Delete an existing Dedicated Inference. Send a DELETE request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}``. The response 202 Accepted
+        indicates the request was accepted for processing.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :return: JSON object or None
+        :rtype: JSON or None
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[Optional[JSON]] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_delete_request(
+            dedicated_inference_id=dedicated_inference_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202, 404]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        deserialized = None
+        response_headers = {}
+        if response.status_code == 202:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, deserialized, response_headers)  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace
+    def list(
+        self,
+        *,
+        per_page: int = 20,
+        page: int = 1,
+        region: Optional[str] = None,
+        **kwargs: Any,
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """List Dedicated Inferences.
+
+        List all Dedicated Inference instances for your team. Send a GET request to
+        ``/v2/dedicated-inferences``. You may filter by region and use page and per_page
+        for pagination.
+
+        :keyword per_page: Number of items returned per page. Default value is 20.
+        :paramtype per_page: int
+        :keyword page: Which 'page' of paginated results to return. Default value is 1.
+        :paramtype page: int
+        :keyword region: Filter by region. Dedicated Inference is only available in nyc2, tor1, and
+         atl1. Known values are: "nyc2", "tor1", and "atl1". Default value is None.
+        :paramtype region: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "dedicated_inferences": [
+                        {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. When the
+                              Dedicated Inference was created.
+                            "endpoints": {
+                                "private_endpoint_fqdn": "str",  # Optional. Private
+                                  VPC FQDN of the Dedicated Inference instance.
+                                "public_endpoint_fqdn": "str"  # Optional. Public
+                                  FQDN of the Dedicated Inference instance.
+                            },
+                            "id": "str",  # Optional. Unique ID of the Dedicated
+                              Inference.
+                            "pending_deployment_spec": {
+                                "created_at": "2020-02-20 00:00:00",  # Optional.
+                                  Pending deployment when status is provisioning or updating.
+                                "enable_public_endpoint": bool,  # Optional. Whether
+                                  to expose a public LLM endpoint.
+                                "id": "str",  # Optional. Deployment UUID.
+                                "model_deployments": [
+                                    {
+                                        "accelerators": [
+                                            {
+                                                "accelerator_slug":
+                                                  "str",  # DigitalOcean GPU slug. Required.
+                                                "scale": 0,  # Number
+                                                  of accelerator instances. Required.
+                                                "type": "str",  #
+                                                  Accelerator type (e.g. prefill_decode). Required.
+                                                "status": "str"  #
+                                                  Optional. Current state of the Accelerator. Known
+                                                  values are: "new", "provisioning", and "active".
+                                            }
+                                        ],
+                                        "model_id": "str",  # Optional. Used
+                                          to identify an existing deployment when updating; empty means
+                                          create new.
+                                        "model_provider": "str",  # Optional.
+                                          Model provider. "hugging_face"
+                                        "model_slug": "str",  # Optional.
+                                          Model identifier (e.g. Hugging Face slug).
+                                        "workload_config": {}  # Optional.
+                                          Workload-specific configuration (e.g. ISL/OSL in future).
+                                    }
+                                ],
+                                "name": "str",  # Optional. Name of the Dedicated
+                                  Inference. Must be unique within the team.
+                                "status": "str",  # Optional. Known values are:
+                                  "provisioning" and "updating".
+                                "updated_at": "2020-02-20 00:00:00",  # Optional.
+                                  Pending deployment when status is provisioning or updating.
+                                "version": 0,  # Optional. Spec version.
+                                "vpc": {
+                                    "uuid": "str"  # VPC UUID for the Dedicated
+                                      Inference. Required.
+                                }
+                            },
+                            "region": "str",  # Optional. DigitalOcean region where the
+                              Dedicated Inference is hosted.
+                            "spec": {
+                                "enable_public_endpoint": bool,  # Whether to expose
+                                  a public LLM endpoint. Required.
+                                "model_deployments": [
+                                    {
+                                        "accelerators": [
+                                            {
+                                                "accelerator_slug":
+                                                  "str",  # DigitalOcean GPU slug. Required.
+                                                "scale": 0,  # Number
+                                                  of accelerator instances. Required.
+                                                "type": "str",  #
+                                                  Accelerator type (e.g. prefill_decode). Required.
+                                                "status": "str"  #
+                                                  Optional. Current state of the Accelerator. Known
+                                                  values are: "new", "provisioning", and "active".
+                                            }
+                                        ],
+                                        "model_id": "str",  # Optional. Used
+                                          to identify an existing deployment when updating; empty means
+                                          create new.
+                                        "model_provider": "str",  # Optional.
+                                          Model provider. "hugging_face"
+                                        "model_slug": "str",  # Optional.
+                                          Model identifier (e.g. Hugging Face slug).
+                                        "workload_config": {}  # Optional.
+                                          Workload-specific configuration (e.g. ISL/OSL in future).
+                                    }
+                                ],
+                                "name": "str",  # Name of the Dedicated Inference.
+                                  Must be unique within the team. Required.
+                                "region": "str",  # DigitalOcean region where the
+                                  Dedicated Inference is hosted. Required. Known values are: "atl1",
+                                  "nyc2", and "tor1".
+                                "version": 0,  # Spec version. Required.
+                                "vpc": {
+                                    "uuid": "str"  # VPC UUID for the Dedicated
+                                      Inference. Required.
+                                }
+                            },
+                            "status": "str",  # Optional. Current state of the Dedicated
+                              Inference. Known values are: "active", "new", "provisioning", "updating",
+                              "deleting", and "error".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. When the
+                              Dedicated Inference was last updated.
+                            "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated
+                              Inference.
+                        }
+                    ],
+                    "links": {
+                        "pages": {
+                            "str": "str"  # Optional. Pagination links (first, prev,
+                              next, last).
+                        }
+                    },
+                    "meta": {
+                        "total": 0  # Total number of results. Required.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_list_request(
+            per_page=per_page,
+            page=page,
+            region=region,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    def create(
+        self, body: JSON, *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference.
+
+        Create a new Dedicated Inference for your team. Send a POST request to
+        ``/v2/dedicated-inferences`` with a ``spec`` object (version, name, region, vpc,
+        enable_public_endpoint, model_deployments) and optional ``access_tokens`` (e.g.
+        hugging_face_token for gated models). The response code 202 Accepted indicates
+        the request was accepted for processing; it does not indicate success or failure.
+        The token value is returned only on create; store it securely.
+
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "spec": {
+                        "enable_public_endpoint": bool,  # Whether to expose a public LLM
+                          endpoint. Required.
+                        "model_deployments": [
+                            {
+                                "accelerators": [
+                                    {
+                                        "accelerator_slug": "str",  #
+                                          DigitalOcean GPU slug. Required.
+                                        "scale": 0,  # Number of accelerator
+                                          instances. Required.
+                                        "type": "str",  # Accelerator type
+                                          (e.g. prefill_decode). Required.
+                                        "status": "str"  # Optional. Current
+                                          state of the Accelerator. Known values are: "new",
+                                          "provisioning", and "active".
+                                    }
+                                ],
+                                "model_id": "str",  # Optional. Used to identify an
+                                  existing deployment when updating; empty means create new.
+                                "model_provider": "str",  # Optional. Model provider.
+                                  "hugging_face"
+                                "model_slug": "str",  # Optional. Model identifier
+                                  (e.g. Hugging Face slug).
+                                "workload_config": {}  # Optional. Workload-specific
+                                  configuration (e.g. ISL/OSL in future).
+                            }
+                        ],
+                        "name": "str",  # Name of the Dedicated Inference. Must be unique
+                          within the team. Required.
+                        "region": "str",  # DigitalOcean region where the Dedicated Inference
+                          is hosted. Required. Known values are: "atl1", "nyc2", and "tor1".
+                        "version": 0,  # Spec version. Required.
+                        "vpc": {
+                            "uuid": "str"  # VPC UUID for the Dedicated Inference.
+                              Required.
+                        }
+                    },
+                    "access_tokens": {
+                        "str": "str"  # Optional. Key-value pairs for provider tokens (e.g.
+                          Hugging Face).
+                    }
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    },
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+        """
+
+    @overload
+    def create(
+        self, body: IO[bytes], *, content_type: str = "application/json", **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference.
+
+        Create a new Dedicated Inference for your team. Send a POST request to
+        ``/v2/dedicated-inferences`` with a ``spec`` object (version, name, region, vpc,
+        enable_public_endpoint, model_deployments) and optional ``access_tokens`` (e.g.
+        hugging_face_token for gated models). The response code 202 Accepted indicates
+        the request was accepted for processing; it does not indicate success or failure.
+        The token value is returned only on create; store it securely.
+
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    },
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+        """
+
+    @distributed_trace
+    def create(self, body: Union[JSON, IO[bytes]], **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference.
+
+        Create a new Dedicated Inference for your team. Send a POST request to
+        ``/v2/dedicated-inferences`` with a ``spec`` object (version, name, region, vpc,
+        enable_public_endpoint, model_deployments) and optional ``access_tokens`` (e.g.
+        hugging_face_token for gated models). The response code 202 Accepted indicates
+        the request was accepted for processing; it does not indicate success or failure.
+        The token value is returned only on create; store it securely.
+
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "spec": {
+                        "enable_public_endpoint": bool,  # Whether to expose a public LLM
+                          endpoint. Required.
+                        "model_deployments": [
+                            {
+                                "accelerators": [
+                                    {
+                                        "accelerator_slug": "str",  #
+                                          DigitalOcean GPU slug. Required.
+                                        "scale": 0,  # Number of accelerator
+                                          instances. Required.
+                                        "type": "str",  # Accelerator type
+                                          (e.g. prefill_decode). Required.
+                                        "status": "str"  # Optional. Current
+                                          state of the Accelerator. Known values are: "new",
+                                          "provisioning", and "active".
+                                    }
+                                ],
+                                "model_id": "str",  # Optional. Used to identify an
+                                  existing deployment when updating; empty means create new.
+                                "model_provider": "str",  # Optional. Model provider.
+                                  "hugging_face"
+                                "model_slug": "str",  # Optional. Model identifier
+                                  (e.g. Hugging Face slug).
+                                "workload_config": {}  # Optional. Workload-specific
+                                  configuration (e.g. ISL/OSL in future).
+                            }
+                        ],
+                        "name": "str",  # Name of the Dedicated Inference. Must be unique
+                          within the team. Required.
+                        "region": "str",  # DigitalOcean region where the Dedicated Inference
+                          is hosted. Required. Known values are: "atl1", "nyc2", and "tor1".
+                        "version": 0,  # Spec version. Required.
+                        "vpc": {
+                            "uuid": "str"  # VPC UUID for the Dedicated Inference.
+                              Required.
+                        }
+                    },
+                    "access_tokens": {
+                        "str": "str"  # Optional. Key-value pairs for provider tokens (e.g.
+                          Hugging Face).
+                    }
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "dedicated_inference": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was created.
+                        "endpoints": {
+                            "private_endpoint_fqdn": "str",  # Optional. Private VPC FQDN
+                              of the Dedicated Inference instance.
+                            "public_endpoint_fqdn": "str"  # Optional. Public FQDN of the
+                              Dedicated Inference instance.
+                        },
+                        "id": "str",  # Optional. Unique ID of the Dedicated Inference.
+                        "pending_deployment_spec": {
+                            "created_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "enable_public_endpoint": bool,  # Optional. Whether to
+                              expose a public LLM endpoint.
+                            "id": "str",  # Optional. Deployment UUID.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Optional. Name of the Dedicated Inference.
+                              Must be unique within the team.
+                            "status": "str",  # Optional. Known values are:
+                              "provisioning" and "updating".
+                            "updated_at": "2020-02-20 00:00:00",  # Optional. Pending
+                              deployment when status is provisioning or updating.
+                            "version": 0,  # Optional. Spec version.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "region": "str",  # Optional. DigitalOcean region where the Dedicated
+                          Inference is hosted.
+                        "spec": {
+                            "enable_public_endpoint": bool,  # Whether to expose a public
+                              LLM endpoint. Required.
+                            "model_deployments": [
+                                {
+                                    "accelerators": [
+                                        {
+                                            "accelerator_slug": "str",  #
+                                              DigitalOcean GPU slug. Required.
+                                            "scale": 0,  # Number of
+                                              accelerator instances. Required.
+                                            "type": "str",  # Accelerator
+                                              type (e.g. prefill_decode). Required.
+                                            "status": "str"  # Optional.
+                                              Current state of the Accelerator. Known values are:
+                                              "new", "provisioning", and "active".
+                                        }
+                                    ],
+                                    "model_id": "str",  # Optional. Used to
+                                      identify an existing deployment when updating; empty means create
+                                      new.
+                                    "model_provider": "str",  # Optional. Model
+                                      provider. "hugging_face"
+                                    "model_slug": "str",  # Optional. Model
+                                      identifier (e.g. Hugging Face slug).
+                                    "workload_config": {}  # Optional.
+                                      Workload-specific configuration (e.g. ISL/OSL in future).
+                                }
+                            ],
+                            "name": "str",  # Name of the Dedicated Inference. Must be
+                              unique within the team. Required.
+                            "region": "str",  # DigitalOcean region where the Dedicated
+                              Inference is hosted. Required. Known values are: "atl1", "nyc2", and
+                              "tor1".
+                            "version": 0,  # Spec version. Required.
+                            "vpc": {
+                                "uuid": "str"  # VPC UUID for the Dedicated
+                                  Inference. Required.
+                            }
+                        },
+                        "status": "str",  # Optional. Current state of the Dedicated
+                          Inference. Known values are: "active", "new", "provisioning", "updating",
+                          "deleting", and "error".
+                        "updated_at": "2020-02-20 00:00:00",  # Optional. When the Dedicated
+                          Inference was last updated.
+                        "vpc_uuid": "str"  # Optional. VPC UUID of the Dedicated Inference.
+                    },
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_dedicated_inferences_create_request(
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace
+    def list_accelerators(
+        self,
+        dedicated_inference_id: str,
+        *,
+        per_page: int = 20,
+        page: int = 1,
+        slug: Optional[str] = None,
+        **kwargs: Any,
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """List Dedicated Inference Accelerators.
+
+        List all accelerators (GPUs) in use by a Dedicated Inference instance. Send a
+        GET request to ``/v2/dedicated-inferences/{dedicated_inference_id}/accelerators``.
+        Optionally filter by slug and use page/per_page for pagination.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :keyword per_page: Number of items returned per page. Default value is 20.
+        :paramtype per_page: int
+        :keyword page: Which 'page' of paginated results to return. Default value is 1.
+        :paramtype page: int
+        :keyword slug: Filter accelerators by GPU slug. Default value is None.
+        :paramtype slug: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "meta": {
+                        "total": 0  # Optional. Number of objects returned by the request.
+                    },
+                    "accelerators": [
+                        {
+                            "created_at": "2020-02-20 00:00:00",  # Optional.
+                            "id": "str",  # Optional. Unique ID of the accelerator.
+                            "name": "str",  # Optional. Name of the accelerator.
+                            "role": "str",  # Optional. Role of the accelerator (e.g.
+                              prefill_decode).
+                            "slug": "str",  # Optional. DigitalOcean GPU slug.
+                            "status": "str"  # Optional. Status of the accelerator.
+                        }
+                    ],
+                    "links": {
+                        "pages": {}
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_list_accelerators_request(
+            dedicated_inference_id=dedicated_inference_id,
+            per_page=per_page,
+            page=page,
+            slug=slug,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace
+    def get_accelerator(
+        self, dedicated_inference_id: str, accelerator_id: str, **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Get a Dedicated Inference Accelerator.
+
+        Retrieve a single accelerator by ID for a Dedicated Inference instance. Send a
+        GET request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}/accelerators/{accelerator_id}``.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param accelerator_id: A unique identifier for a Dedicated Inference accelerator. Required.
+        :type accelerator_id: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "created_at": "2020-02-20 00:00:00",  # Optional.
+                    "id": "str",  # Optional. Unique ID of the accelerator.
+                    "name": "str",  # Optional. Name of the accelerator.
+                    "role": "str",  # Optional. Role of the accelerator (e.g. prefill_decode).
+                    "slug": "str",  # Optional. DigitalOcean GPU slug.
+                    "status": "str"  # Optional. Status of the accelerator.
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_get_accelerator_request(
+            dedicated_inference_id=dedicated_inference_id,
+            accelerator_id=accelerator_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace
+    def get_ca(self, dedicated_inference_id: str, **kwargs: Any) -> JSON:
+        # pylint: disable=line-too-long
+        """Get Dedicated Inference CA Certificate.
+
+        Get the CA certificate for a Dedicated Inference instance (base64-encoded).
+        Required for private endpoint connectivity. Send a GET request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}/ca``.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "cert": "str"  # Base64-encoded CA certificate. Required.
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_get_ca_request(
+            dedicated_inference_id=dedicated_inference_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace
+    def list_tokens(
+        self,
+        dedicated_inference_id: str,
+        *,
+        per_page: int = 20,
+        page: int = 1,
+        **kwargs: Any,
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """List Dedicated Inference Tokens.
+
+        List all access tokens for a Dedicated Inference instance. Token values are
+        not returned; only id, name, and created_at. Send a GET request to
+        ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens``.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :keyword per_page: Number of items returned per page. Default value is 20.
+        :paramtype per_page: int
+        :keyword page: Which 'page' of paginated results to return. Default value is 1.
+        :paramtype page: int
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "meta": {
+                        "total": 0  # Optional. Number of objects returned by the request.
+                    },
+                    "links": {
+                        "pages": {}
+                    },
+                    "tokens": [
+                        {
+                            "created_at": "2020-02-20 00:00:00",  # Optional.
+                            "id": "str",  # Optional. Unique ID of the token.
+                            "name": "str",  # Optional. Name of the token.
+                            "value": "str"  # Optional. Token value; only returned once
+                              on create. Store securely.
+                        }
+                    ]
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_list_tokens_request(
+            dedicated_inference_id=dedicated_inference_id,
+            per_page=per_page,
+            page=page,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200, 404]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 200:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @overload
+    def create_tokens(
+        self,
+        dedicated_inference_id: str,
+        body: JSON,
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference Token.
+
+        Create a new access token for a Dedicated Inference instance. Send a POST
+        request to ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens`` with a
+        ``name``. The token value is returned only once in the response; store it securely.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Required.
+        :type body: JSON
+        :keyword content_type: Body Parameter content-type. Content type parameter for JSON body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "name": "str"  # Name for the new token. Required.
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+
+    @overload
+    def create_tokens(
+        self,
+        dedicated_inference_id: str,
+        body: IO[bytes],
+        *,
+        content_type: str = "application/json",
+        **kwargs: Any,
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference Token.
+
+        Create a new access token for a Dedicated Inference instance. Send a POST
+        request to ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens`` with a
+        ``name``. The token value is returned only once in the response; store it securely.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Required.
+        :type body: IO[bytes]
+        :keyword content_type: Body Parameter content-type. Content type parameter for binary body.
+         Default value is "application/json".
+        :paramtype content_type: str
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 202
+                response == {
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+
+    @distributed_trace
+    def create_tokens(
+        self, dedicated_inference_id: str, body: Union[JSON, IO[bytes]], **kwargs: Any
+    ) -> JSON:
+        # pylint: disable=line-too-long
+        """Create a Dedicated Inference Token.
+
+        Create a new access token for a Dedicated Inference instance. Send a POST
+        request to ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens`` with a
+        ``name``. The token value is returned only once in the response; store it securely.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param body: Is either a JSON type or a IO[bytes] type. Required.
+        :type body: JSON or IO[bytes]
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # JSON input template you can fill out and use as your body input.
+                body = {
+                    "name": "str"  # Name for the new token. Required.
+                }
+
+                # response body for status code(s): 202
+                response == {
+                    "token": {
+                        "created_at": "2020-02-20 00:00:00",  # Optional. Access token for
+                          authenticating to Dedicated Inference endpoints.
+                        "id": "str",  # Optional. Unique ID of the token.
+                        "name": "str",  # Optional. Name of the token.
+                        "value": "str"  # Optional. Token value; only returned once on
+                          create. Store securely.
+                    }
+                }
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
+        _params = kwargs.pop("params", {}) or {}
+
+        content_type: Optional[str] = kwargs.pop(
+            "content_type", _headers.pop("Content-Type", None)
+        )
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        content_type = content_type or "application/json"
+        _json = None
+        _content = None
+        if isinstance(body, (IOBase, bytes)):
+            _content = body
+        else:
+            _json = body
+
+        _request = build_dedicated_inferences_create_tokens_request(
+            dedicated_inference_id=dedicated_inference_id,
+            content_type=content_type,
+            json=_json,
+            content=_content,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [202, 404]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        if response.status_code == 202:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace
+    def delete_tokens(
+        self, dedicated_inference_id: str, token_id: str, **kwargs: Any
+    ) -> Optional[JSON]:
+        # pylint: disable=line-too-long
+        """Revoke a Dedicated Inference Token.
+
+        Revoke (delete) an access token for a Dedicated Inference instance. Send a
+        DELETE request to ``/v2/dedicated-inferences/{dedicated_inference_id}/tokens/{token_id}``.
+
+        :param dedicated_inference_id: A unique identifier for a Dedicated Inference instance.
+         Required.
+        :type dedicated_inference_id: str
+        :param token_id: A unique identifier for a Dedicated Inference access token. Required.
+        :type token_id: str
+        :return: JSON object or None
+        :rtype: JSON or None
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 404
+                response == {
+                    "id": "str",  # A short identifier corresponding to the HTTP status code
+                      returned. For  example, the ID for a response returning a 404 status code would
+                      be "not_found.". Required.
+                    "message": "str",  # A message providing additional information about the
+                      error, including  details to help resolve it when possible. Required.
+                    "request_id": "str"  # Optional. Optionally, some endpoints may include a
+                      request ID that should be  provided when reporting bugs or opening support
+                      tickets to help  identify the issue.
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[Optional[JSON]] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_delete_tokens_request(
+            dedicated_inference_id=dedicated_inference_id,
+            token_id=token_id,
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [204, 404]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        deserialized = None
+        response_headers = {}
+        if response.status_code == 204:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+        if response.status_code == 404:
+            response_headers["ratelimit-limit"] = self._deserialize(
+                "int", response.headers.get("ratelimit-limit")
+            )
+            response_headers["ratelimit-remaining"] = self._deserialize(
+                "int", response.headers.get("ratelimit-remaining")
+            )
+            response_headers["ratelimit-reset"] = self._deserialize(
+                "int", response.headers.get("ratelimit-reset")
+            )
+
+            if response.content:
+                deserialized = response.json()
+            else:
+                deserialized = None
+
+        if cls:
+            return cls(pipeline_response, deserialized, response_headers)  # type: ignore
+
+        return deserialized  # type: ignore
+
+    @distributed_trace
+    def list_sizes(self, **kwargs: Any) -> JSON:
+        """List Dedicated Inference Sizes.
+
+        Get available Dedicated Inference sizes and pricing for supported GPUs. Send a
+        GET request to ``/v2/dedicated-inferences/sizes``.
+
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "enabled_regions": [
+                        "str"  # Optional. Regions where Dedicated Inference is available.
+                    ],
+                    "sizes": [
+                        {
+                            "currency": "str",  # Optional.
+                            "gpu_slug": "str",  # Optional.
+                            "price_per_hour": "str",  # Optional.
+                            "region": "str"  # Optional.
+                        }
+                    ]
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_list_sizes_request(
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+    @distributed_trace
+    def get_gpu_model_config(self, **kwargs: Any) -> JSON:
+        """Get Dedicated Inference GPU Model Config.
+
+        Get supported GPU and model configurations for Dedicated Inference. Use this to
+        discover supported GPU slugs and model slugs (e.g. Hugging Face). Send a GET
+        request to ``/v2/dedicated-inferences/gpu-model-config``.
+
+        :return: JSON object
+        :rtype: JSON
+        :raises ~azure.core.exceptions.HttpResponseError:
+
+        Example:
+            .. code-block:: python
+
+                # response body for status code(s): 200
+                response == {
+                    "gpu_model_configs": [
+                        {
+                            "gpu_slugs": [
+                                "str"  # Optional.
+                            ],
+                            "is_gated_model": bool,  # Optional. Whether the model
+                              requires gated access (e.g. Hugging Face token).
+                            "model_name": "str",  # Optional.
+                            "model_slug": "str"  # Optional.
+                        }
+                    ]
+                }
+        """
+        error_map: MutableMapping[int, Type[HttpResponseError]] = {
+            404: ResourceNotFoundError,
+            409: ResourceExistsError,
+            304: ResourceNotModifiedError,
+            401: cast(
+                Type[HttpResponseError],
+                lambda response: ClientAuthenticationError(response=response),
+            ),
+            429: HttpResponseError,
+            500: HttpResponseError,
+        }
+        error_map.update(kwargs.pop("error_map", {}) or {})
+
+        _headers = kwargs.pop("headers", {}) or {}
+        _params = kwargs.pop("params", {}) or {}
+
+        cls: ClsType[JSON] = kwargs.pop("cls", None)
+
+        _request = build_dedicated_inferences_get_gpu_model_config_request(
+            headers=_headers,
+            params=_params,
+        )
+        _request.url = self._client.format_url(_request.url)
+
+        _stream = False
+        pipeline_response: PipelineResponse = (
+            self._client._pipeline.run(  # pylint: disable=protected-access
+                _request, stream=_stream, **kwargs
+            )
+        )
+
+        response = pipeline_response.http_response
+
+        if response.status_code not in [200]:
+            if _stream:
+                response.read()  # Load the body in memory and close the socket
+            map_error(status_code=response.status_code, response=response, error_map=error_map)  # type: ignore
+            raise HttpResponseError(response=response)
+
+        response_headers = {}
+        response_headers["ratelimit-limit"] = self._deserialize(
+            "int", response.headers.get("ratelimit-limit")
+        )
+        response_headers["ratelimit-remaining"] = self._deserialize(
+            "int", response.headers.get("ratelimit-remaining")
+        )
+        response_headers["ratelimit-reset"] = self._deserialize(
+            "int", response.headers.get("ratelimit-reset")
+        )
+
+        if response.content:
+            deserialized = response.json()
+        else:
+            deserialized = None
+
+        if cls:
+            return cls(pipeline_response, cast(JSON, deserialized), response_headers)  # type: ignore
+
+        return cast(JSON, deserialized)  # type: ignore
+
+
 class DomainsOperations:
     """
     .. warning::