From 80722f2dec7c392997ae0796490078a0a0c5bc56 Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Tue, 9 Jun 2026 15:20:13 -0700
Subject: [PATCH 01/10] feat(models): Add prompt entity

Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 openapi/ga/individual/platform.openapi.yaml   | 560 ++++++++++++++++++
 openapi/ga/openapi.yaml                       | 560 ++++++++++++++++++
 openapi/openapi.yaml                          | 560 ++++++++++++++++++
 sdk/stainless.yaml                            |  18 +
 .../src/nmp/core/models/api/dependencies.py   |   8 +
 .../core/models/api/service/prompt_service.py | 175 ++++++
 .../src/nmp/core/models/api/v2/prompts.py     | 186 ++++++
 .../models/src/nmp/core/models/entities.py    |  53 ++
 .../models/src/nmp/core/models/schemas.py     | 212 ++++++-
 .../models/src/nmp/core/models/service.py     |   7 +-
 .../models/tests/unit/api/test_prompts_api.py | 244 ++++++++
 .../tests/unit/test_prompt_service_unit.py    | 231 ++++++++
 12 files changed, 2812 insertions(+), 2 deletions(-)
 create mode 100644 services/core/models/src/nmp/core/models/api/service/prompt_service.py
 create mode 100644 services/core/models/src/nmp/core/models/api/v2/prompts.py
 create mode 100644 services/core/models/tests/unit/api/test_prompts_api.py
 create mode 100644 services/core/models/tests/unit/test_prompt_service_unit.py

diff --git a/openapi/ga/individual/platform.openapi.yaml b/openapi/ga/individual/platform.openapi.yaml
index b0ebfd2e13..96e7ade804 100644
--- a/openapi/ga/individual/platform.openapi.yaml
+++ b/openapi/ga/individual/platform.openapi.yaml
@@ -6579,6 +6579,202 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/HTTPValidationError'
+  /apis/models/v2/workspaces/{workspace}/prompts:
+    get:
+      tags:
+      - Prompts
+      summary: List Prompts By Workspace
+      description: List prompts for a specific workspace.
+      operationId: list_prompts_apis_models_v2_workspaces__workspace__prompts_get
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: page
+        in: query
+        required: false
+        schema:
+          type: integer
+          description: Page number.
+          default: 1
+          title: Page
+        description: Page number.
+      - name: page_size
+        in: query
+        required: false
+        schema:
+          type: integer
+          description: Page size.
+          default: 100
+          title: Page Size
+        description: Page size.
+      - name: sort
+        in: query
+        required: false
+        schema:
+          allOf:
+          - $ref: '#/components/schemas/PromptSort'
+          description: The field to sort by. To sort in decreasing order, use `-`
+            in front of the field name.
+          default: created_at
+        description: The field to sort by. To sort in decreasing order, use `-` in
+          front of the field name.
+      - in: query
+        name: filter
+        style: deepObject
+        required: false
+        explode: true
+        schema:
+          $ref: '#/components/schemas/PromptFilter'
+        description: Filter prompts by workspace, project, name, description, created_at,
+          and updated_at.
+      responses:
+        '200':
+          description: Return prompts for a workspace
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PromptsPage'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    post:
+      tags:
+      - Prompts
+      summary: Create Prompt
+      description: Create a new prompt.
+      operationId: create_prompt_apis_models_v2_workspaces__workspace__prompts_post
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreatePromptRequest'
+      responses:
+        '201':
+          description: Create a new prompt
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /apis/models/v2/workspaces/{workspace}/prompts/{name}:
+    get:
+      tags:
+      - Prompts
+      summary: Get Prompt
+      description: Get a prompt by workspace and name.
+      operationId: get_prompt_apis_models_v2_workspaces__workspace__prompts__name__get
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      responses:
+        '200':
+          description: Return prompt details
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    put:
+      tags:
+      - Prompts
+      summary: Update Prompt
+      description: Update an existing prompt (full replacement of mutable fields).
+      operationId: update_prompt_apis_models_v2_workspaces__workspace__prompts__name__put
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/UpdatePromptRequest'
+      responses:
+        '200':
+          description: Update an existing prompt
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    delete:
+      tags:
+      - Prompts
+      summary: Delete Prompt
+      description: Delete a prompt by workspace and name.
+      operationId: delete_prompt_apis_models_v2_workspaces__workspace__prompts__name__delete
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      responses:
+        '204':
+          description: Delete a prompt
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
   /apis/models/v2/workspaces/{workspace}/providers:
     get:
       tags:
@@ -8204,6 +8400,24 @@ components:
       - role
       title: ChatCompletionSystemMessageParam
       description: System message parameter for chat completion.
+    ChatCompletionTool:
+      properties:
+        type:
+          type: string
+          const: function
+          title: Type
+          description: The type of the tool. Currently only 'function' is supported.
+          default: function
+        function:
+          allOf:
+          - $ref: '#/components/schemas/FunctionDefinition'
+          description: The function definition for this tool.
+      type: object
+      required:
+      - function
+      title: ChatCompletionTool
+      description: An OpenAI-compatible tool definition (currently always a function
+        tool).
     ChatCompletionToolMessageParam:
       properties:
         content:
@@ -8910,6 +9124,65 @@ components:
       - source
       title: CreatePlatformJobRequest
       description: Request model for creating a new platform job.
+    CreatePromptRequest:
+      properties:
+        name:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Name
+          description: 'Name of the prompt. Allowed characters: letters (a-z, A-Z),
+            digits (0-9), underscores, hyphens, and dots.'
+          examples:
+          - support-bot-system
+          - summarizer
+        project:
+          title: Project
+          description: The URN of the project associated with this prompt.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        description:
+          title: Description
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+        tools:
+          title: Tools
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+        response_format:
+          title: Response Format
+          additionalProperties: true
+          type: object
+        inference_params:
+          $ref: '#/components/schemas/InferenceParams'
+        tags:
+          title: Tags
+          items:
+            type: string
+          type: array
+      type: object
+      required:
+      - name
+      title: CreatePromptRequest
+      description: Request model for creating a Prompt.
     CreateVirtualModelRequest:
       properties:
         default_model_entity:
@@ -10512,6 +10785,39 @@ components:
       - name
       title: FunctionCall
       description: Function call information.
+    FunctionDefinition:
+      properties:
+        name:
+          type: string
+          maxLength: 255
+          title: Name
+          description: The name of the function to be called.
+        description:
+          title: Description
+          description: A description of what the function does, used by the model
+            to decide when and how to call it.
+          type: string
+        parameters:
+          title: Parameters
+          description: The parameters the function accepts, described as a JSON Schema
+            object.
+          additionalProperties: true
+          type: object
+        strict:
+          title: Strict
+          description: Whether to enforce strict schema adherence when generating
+            the function call.
+          type: boolean
+      type: object
+      required:
+      - name
+      title: FunctionDefinition
+      description: 'An OpenAI-compatible function definition for tool calling.
+
+
+        Mirrors the ``function`` object the Inference Gateway forwards to
+
+        OpenAI-compatible backends.'
     GLiNERDetection:
       properties:
         server_endpoint:
@@ -15064,6 +15370,110 @@ components:
       required:
       - data
       title: ProjectsPage
+    Prompt:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: Unique identifier for the prompt.
+        name:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Name
+          description: 'Name of the entity. Name/workspace combo must be unique across
+            all entities. Allowed characters: letters (a-z, A-Z), digits (0-9), underscores,
+            hyphens, and dots.'
+          examples:
+          - llama-3.1-8b
+          - my-custom-model
+        workspace:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Workspace
+          description: 'The workspace of the entity. Allowed characters: letters (a-z,
+            A-Z), digits (0-9), underscores, hyphens, and dots.'
+        project:
+          title: Project
+          description: The URN of the project associated with this entity.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        created_at:
+          type: string
+          format: date-time
+          title: Created At
+          description: The timestamp of model entity creation
+        updated_at:
+          type: string
+          format: date-time
+          title: Updated At
+          description: The timestamp of the last model entity update
+        description:
+          title: Description
+          description: Optional description of the prompt.
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+          description: Ordered list of chat messages that make up the prompt.
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+          description: Names of the Jinja2 template variables the prompt expects.
+        tools:
+          title: Tools
+          description: Optional OpenAI-compatible tool definitions to send with the
+            prompt.
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+          description: 'Controls which (if any) tool is called: ''none'', ''auto'',
+            ''required'', or a named-tool object.'
+        response_format:
+          title: Response Format
+          description: Optional OpenAI-compatible response_format, e.g. a json_schema
+            structured-output spec.
+          additionalProperties: true
+          type: object
+        inference_params:
+          allOf:
+          - $ref: '#/components/schemas/InferenceParams'
+          description: Optional default model and sampling parameters (temperature,
+            top_p, max_tokens, ...).
+        tags:
+          items:
+            type: string
+          type: array
+          title: Tags
+          description: Optional free-form tags for organizing prompts.
+      type: object
+      required:
+      - name
+      - workspace
+      - created_at
+      - updated_at
+      title: Prompt
+      description: 'A reusable, stored chat prompt.
+
+
+        A Prompt captures the messages, declared template variables, optional tool
+
+        definitions, and default inference parameters needed to invoke a model
+
+        through the Inference Gateway. The unique identifier is workspace/name.'
     PromptData:
       properties:
         system_prompt:
@@ -15086,6 +15496,106 @@ components:
       type: object
       title: PromptData
       description: Configuration for prompt engineering.
+    PromptFilter:
+      additionalProperties: false
+      description: Filter for Prompt queries.
+      properties:
+        workspace:
+          description: Filter by workspace.
+          title: Workspace
+          type: string
+        project:
+          description: Filter by project URN.
+          title: Project
+          type: string
+        name:
+          description: Filter by name.
+          title: Name
+          type: string
+        description:
+          description: Filter by description.
+          title: Description
+          type: string
+        created_at:
+          allOf:
+          - $ref: '#/components/schemas/DatetimeFilter'
+          description: Filter by creation date.
+        updated_at:
+          allOf:
+          - $ref: '#/components/schemas/DatetimeFilter'
+          description: Filter by update date.
+      title: PromptFilter
+      type: object
+    PromptMessage:
+      properties:
+        role:
+          allOf:
+          - $ref: '#/components/schemas/PromptMessageRole'
+          description: The role of the message author.
+        content:
+          type: string
+          title: Content
+          description: Templated message content. May contain template variables.
+      type: object
+      required:
+      - role
+      - content
+      title: PromptMessage
+      description: 'A single templated message in a chat prompt.
+
+
+        ``content`` is a Jinja2 template body that may reference the prompt''s
+
+        declared ``input_variables`` (e.g. ``{{ topic }}``).'
+    PromptMessageRole:
+      type: string
+      enum:
+      - system
+      - developer
+      - user
+      - assistant
+      title: PromptMessageRole
+      description: 'Role of a message author in a chat prompt.
+
+
+        Follows the OpenAI chat schema the Inference Gateway speaks
+
+        (``/v1/chat/completions``).'
+    PromptSort:
+      type: string
+      enum:
+      - name
+      - -name
+      - created_at
+      - -created_at
+      - updated_at
+      - -updated_at
+      title: PromptSort
+      description: Sort fields for Prompt queries.
+    PromptsPage:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Prompt'
+          type: array
+          title: Data
+        pagination:
+          allOf:
+          - $ref: '#/components/schemas/PaginationData'
+          description: Pagination information.
+        sort:
+          title: Sort
+          description: The field on which the results are sorted.
+          type: string
+        filter:
+          title: Filter
+          description: Filtering information.
+          additionalProperties: true
+          type: object
+      type: object
+      required:
+      - data
+      title: PromptsPage
     RailStatus:
       properties:
         status:
@@ -16870,6 +17380,56 @@ components:
 
 
         This endpoint supports partial updates for fields managed by Models Controller.'
+    UpdatePromptRequest:
+      properties:
+        project:
+          title: Project
+          description: The URN of the project associated with this prompt.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        description:
+          title: Description
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+        tools:
+          title: Tools
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+        response_format:
+          title: Response Format
+          additionalProperties: true
+          type: object
+        inference_params:
+          $ref: '#/components/schemas/InferenceParams'
+        tags:
+          title: Tags
+          items:
+            type: string
+          type: array
+      type: object
+      title: UpdatePromptRequest
+      description: 'Request model for replacing a Prompt''s mutable fields (full update).
+
+
+        The prompt name and workspace come from the URL path and cannot be changed.'
     UpdateVirtualModelRequest:
       properties:
         default_model_entity:
diff --git a/openapi/ga/openapi.yaml b/openapi/ga/openapi.yaml
index b0ebfd2e13..96e7ade804 100644
--- a/openapi/ga/openapi.yaml
+++ b/openapi/ga/openapi.yaml
@@ -6579,6 +6579,202 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/HTTPValidationError'
+  /apis/models/v2/workspaces/{workspace}/prompts:
+    get:
+      tags:
+      - Prompts
+      summary: List Prompts By Workspace
+      description: List prompts for a specific workspace.
+      operationId: list_prompts_apis_models_v2_workspaces__workspace__prompts_get
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: page
+        in: query
+        required: false
+        schema:
+          type: integer
+          description: Page number.
+          default: 1
+          title: Page
+        description: Page number.
+      - name: page_size
+        in: query
+        required: false
+        schema:
+          type: integer
+          description: Page size.
+          default: 100
+          title: Page Size
+        description: Page size.
+      - name: sort
+        in: query
+        required: false
+        schema:
+          allOf:
+          - $ref: '#/components/schemas/PromptSort'
+          description: The field to sort by. To sort in decreasing order, use `-`
+            in front of the field name.
+          default: created_at
+        description: The field to sort by. To sort in decreasing order, use `-` in
+          front of the field name.
+      - in: query
+        name: filter
+        style: deepObject
+        required: false
+        explode: true
+        schema:
+          $ref: '#/components/schemas/PromptFilter'
+        description: Filter prompts by workspace, project, name, description, created_at,
+          and updated_at.
+      responses:
+        '200':
+          description: Return prompts for a workspace
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PromptsPage'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    post:
+      tags:
+      - Prompts
+      summary: Create Prompt
+      description: Create a new prompt.
+      operationId: create_prompt_apis_models_v2_workspaces__workspace__prompts_post
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreatePromptRequest'
+      responses:
+        '201':
+          description: Create a new prompt
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /apis/models/v2/workspaces/{workspace}/prompts/{name}:
+    get:
+      tags:
+      - Prompts
+      summary: Get Prompt
+      description: Get a prompt by workspace and name.
+      operationId: get_prompt_apis_models_v2_workspaces__workspace__prompts__name__get
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      responses:
+        '200':
+          description: Return prompt details
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    put:
+      tags:
+      - Prompts
+      summary: Update Prompt
+      description: Update an existing prompt (full replacement of mutable fields).
+      operationId: update_prompt_apis_models_v2_workspaces__workspace__prompts__name__put
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/UpdatePromptRequest'
+      responses:
+        '200':
+          description: Update an existing prompt
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    delete:
+      tags:
+      - Prompts
+      summary: Delete Prompt
+      description: Delete a prompt by workspace and name.
+      operationId: delete_prompt_apis_models_v2_workspaces__workspace__prompts__name__delete
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      responses:
+        '204':
+          description: Delete a prompt
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
   /apis/models/v2/workspaces/{workspace}/providers:
     get:
       tags:
@@ -8204,6 +8400,24 @@ components:
       - role
       title: ChatCompletionSystemMessageParam
       description: System message parameter for chat completion.
+    ChatCompletionTool:
+      properties:
+        type:
+          type: string
+          const: function
+          title: Type
+          description: The type of the tool. Currently only 'function' is supported.
+          default: function
+        function:
+          allOf:
+          - $ref: '#/components/schemas/FunctionDefinition'
+          description: The function definition for this tool.
+      type: object
+      required:
+      - function
+      title: ChatCompletionTool
+      description: An OpenAI-compatible tool definition (currently always a function
+        tool).
     ChatCompletionToolMessageParam:
       properties:
         content:
@@ -8910,6 +9124,65 @@ components:
       - source
       title: CreatePlatformJobRequest
       description: Request model for creating a new platform job.
+    CreatePromptRequest:
+      properties:
+        name:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Name
+          description: 'Name of the prompt. Allowed characters: letters (a-z, A-Z),
+            digits (0-9), underscores, hyphens, and dots.'
+          examples:
+          - support-bot-system
+          - summarizer
+        project:
+          title: Project
+          description: The URN of the project associated with this prompt.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        description:
+          title: Description
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+        tools:
+          title: Tools
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+        response_format:
+          title: Response Format
+          additionalProperties: true
+          type: object
+        inference_params:
+          $ref: '#/components/schemas/InferenceParams'
+        tags:
+          title: Tags
+          items:
+            type: string
+          type: array
+      type: object
+      required:
+      - name
+      title: CreatePromptRequest
+      description: Request model for creating a Prompt.
     CreateVirtualModelRequest:
       properties:
         default_model_entity:
@@ -10512,6 +10785,39 @@ components:
       - name
       title: FunctionCall
       description: Function call information.
+    FunctionDefinition:
+      properties:
+        name:
+          type: string
+          maxLength: 255
+          title: Name
+          description: The name of the function to be called.
+        description:
+          title: Description
+          description: A description of what the function does, used by the model
+            to decide when and how to call it.
+          type: string
+        parameters:
+          title: Parameters
+          description: The parameters the function accepts, described as a JSON Schema
+            object.
+          additionalProperties: true
+          type: object
+        strict:
+          title: Strict
+          description: Whether to enforce strict schema adherence when generating
+            the function call.
+          type: boolean
+      type: object
+      required:
+      - name
+      title: FunctionDefinition
+      description: 'An OpenAI-compatible function definition for tool calling.
+
+
+        Mirrors the ``function`` object the Inference Gateway forwards to
+
+        OpenAI-compatible backends.'
     GLiNERDetection:
       properties:
         server_endpoint:
@@ -15064,6 +15370,110 @@ components:
       required:
       - data
       title: ProjectsPage
+    Prompt:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: Unique identifier for the prompt.
+        name:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Name
+          description: 'Name of the entity. Name/workspace combo must be unique across
+            all entities. Allowed characters: letters (a-z, A-Z), digits (0-9), underscores,
+            hyphens, and dots.'
+          examples:
+          - llama-3.1-8b
+          - my-custom-model
+        workspace:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Workspace
+          description: 'The workspace of the entity. Allowed characters: letters (a-z,
+            A-Z), digits (0-9), underscores, hyphens, and dots.'
+        project:
+          title: Project
+          description: The URN of the project associated with this entity.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        created_at:
+          type: string
+          format: date-time
+          title: Created At
+          description: The timestamp of model entity creation
+        updated_at:
+          type: string
+          format: date-time
+          title: Updated At
+          description: The timestamp of the last model entity update
+        description:
+          title: Description
+          description: Optional description of the prompt.
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+          description: Ordered list of chat messages that make up the prompt.
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+          description: Names of the Jinja2 template variables the prompt expects.
+        tools:
+          title: Tools
+          description: Optional OpenAI-compatible tool definitions to send with the
+            prompt.
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+          description: 'Controls which (if any) tool is called: ''none'', ''auto'',
+            ''required'', or a named-tool object.'
+        response_format:
+          title: Response Format
+          description: Optional OpenAI-compatible response_format, e.g. a json_schema
+            structured-output spec.
+          additionalProperties: true
+          type: object
+        inference_params:
+          allOf:
+          - $ref: '#/components/schemas/InferenceParams'
+          description: Optional default model and sampling parameters (temperature,
+            top_p, max_tokens, ...).
+        tags:
+          items:
+            type: string
+          type: array
+          title: Tags
+          description: Optional free-form tags for organizing prompts.
+      type: object
+      required:
+      - name
+      - workspace
+      - created_at
+      - updated_at
+      title: Prompt
+      description: 'A reusable, stored chat prompt.
+
+
+        A Prompt captures the messages, declared template variables, optional tool
+
+        definitions, and default inference parameters needed to invoke a model
+
+        through the Inference Gateway. The unique identifier is workspace/name.'
     PromptData:
       properties:
         system_prompt:
@@ -15086,6 +15496,106 @@ components:
       type: object
       title: PromptData
       description: Configuration for prompt engineering.
+    PromptFilter:
+      additionalProperties: false
+      description: Filter for Prompt queries.
+      properties:
+        workspace:
+          description: Filter by workspace.
+          title: Workspace
+          type: string
+        project:
+          description: Filter by project URN.
+          title: Project
+          type: string
+        name:
+          description: Filter by name.
+          title: Name
+          type: string
+        description:
+          description: Filter by description.
+          title: Description
+          type: string
+        created_at:
+          allOf:
+          - $ref: '#/components/schemas/DatetimeFilter'
+          description: Filter by creation date.
+        updated_at:
+          allOf:
+          - $ref: '#/components/schemas/DatetimeFilter'
+          description: Filter by update date.
+      title: PromptFilter
+      type: object
+    PromptMessage:
+      properties:
+        role:
+          allOf:
+          - $ref: '#/components/schemas/PromptMessageRole'
+          description: The role of the message author.
+        content:
+          type: string
+          title: Content
+          description: Templated message content. May contain template variables.
+      type: object
+      required:
+      - role
+      - content
+      title: PromptMessage
+      description: 'A single templated message in a chat prompt.
+
+
+        ``content`` is a Jinja2 template body that may reference the prompt''s
+
+        declared ``input_variables`` (e.g. ``{{ topic }}``).'
+    PromptMessageRole:
+      type: string
+      enum:
+      - system
+      - developer
+      - user
+      - assistant
+      title: PromptMessageRole
+      description: 'Role of a message author in a chat prompt.
+
+
+        Follows the OpenAI chat schema the Inference Gateway speaks
+
+        (``/v1/chat/completions``).'
+    PromptSort:
+      type: string
+      enum:
+      - name
+      - -name
+      - created_at
+      - -created_at
+      - updated_at
+      - -updated_at
+      title: PromptSort
+      description: Sort fields for Prompt queries.
+    PromptsPage:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Prompt'
+          type: array
+          title: Data
+        pagination:
+          allOf:
+          - $ref: '#/components/schemas/PaginationData'
+          description: Pagination information.
+        sort:
+          title: Sort
+          description: The field on which the results are sorted.
+          type: string
+        filter:
+          title: Filter
+          description: Filtering information.
+          additionalProperties: true
+          type: object
+      type: object
+      required:
+      - data
+      title: PromptsPage
     RailStatus:
       properties:
         status:
@@ -16870,6 +17380,56 @@ components:
 
 
         This endpoint supports partial updates for fields managed by Models Controller.'
+    UpdatePromptRequest:
+      properties:
+        project:
+          title: Project
+          description: The URN of the project associated with this prompt.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        description:
+          title: Description
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+        tools:
+          title: Tools
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+        response_format:
+          title: Response Format
+          additionalProperties: true
+          type: object
+        inference_params:
+          $ref: '#/components/schemas/InferenceParams'
+        tags:
+          title: Tags
+          items:
+            type: string
+          type: array
+      type: object
+      title: UpdatePromptRequest
+      description: 'Request model for replacing a Prompt''s mutable fields (full update).
+
+
+        The prompt name and workspace come from the URL path and cannot be changed.'
     UpdateVirtualModelRequest:
       properties:
         default_model_entity:
diff --git a/openapi/openapi.yaml b/openapi/openapi.yaml
index b0ebfd2e13..96e7ade804 100644
--- a/openapi/openapi.yaml
+++ b/openapi/openapi.yaml
@@ -6579,6 +6579,202 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/HTTPValidationError'
+  /apis/models/v2/workspaces/{workspace}/prompts:
+    get:
+      tags:
+      - Prompts
+      summary: List Prompts By Workspace
+      description: List prompts for a specific workspace.
+      operationId: list_prompts_apis_models_v2_workspaces__workspace__prompts_get
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: page
+        in: query
+        required: false
+        schema:
+          type: integer
+          description: Page number.
+          default: 1
+          title: Page
+        description: Page number.
+      - name: page_size
+        in: query
+        required: false
+        schema:
+          type: integer
+          description: Page size.
+          default: 100
+          title: Page Size
+        description: Page size.
+      - name: sort
+        in: query
+        required: false
+        schema:
+          allOf:
+          - $ref: '#/components/schemas/PromptSort'
+          description: The field to sort by. To sort in decreasing order, use `-`
+            in front of the field name.
+          default: created_at
+        description: The field to sort by. To sort in decreasing order, use `-` in
+          front of the field name.
+      - in: query
+        name: filter
+        style: deepObject
+        required: false
+        explode: true
+        schema:
+          $ref: '#/components/schemas/PromptFilter'
+        description: Filter prompts by workspace, project, name, description, created_at,
+          and updated_at.
+      responses:
+        '200':
+          description: Return prompts for a workspace
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PromptsPage'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    post:
+      tags:
+      - Prompts
+      summary: Create Prompt
+      description: Create a new prompt.
+      operationId: create_prompt_apis_models_v2_workspaces__workspace__prompts_post
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreatePromptRequest'
+      responses:
+        '201':
+          description: Create a new prompt
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /apis/models/v2/workspaces/{workspace}/prompts/{name}:
+    get:
+      tags:
+      - Prompts
+      summary: Get Prompt
+      description: Get a prompt by workspace and name.
+      operationId: get_prompt_apis_models_v2_workspaces__workspace__prompts__name__get
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      responses:
+        '200':
+          description: Return prompt details
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    put:
+      tags:
+      - Prompts
+      summary: Update Prompt
+      description: Update an existing prompt (full replacement of mutable fields).
+      operationId: update_prompt_apis_models_v2_workspaces__workspace__prompts__name__put
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/UpdatePromptRequest'
+      responses:
+        '200':
+          description: Update an existing prompt
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    delete:
+      tags:
+      - Prompts
+      summary: Delete Prompt
+      description: Delete a prompt by workspace and name.
+      operationId: delete_prompt_apis_models_v2_workspaces__workspace__prompts__name__delete
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      responses:
+        '204':
+          description: Delete a prompt
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
   /apis/models/v2/workspaces/{workspace}/providers:
     get:
       tags:
@@ -8204,6 +8400,24 @@ components:
       - role
       title: ChatCompletionSystemMessageParam
       description: System message parameter for chat completion.
+    ChatCompletionTool:
+      properties:
+        type:
+          type: string
+          const: function
+          title: Type
+          description: The type of the tool. Currently only 'function' is supported.
+          default: function
+        function:
+          allOf:
+          - $ref: '#/components/schemas/FunctionDefinition'
+          description: The function definition for this tool.
+      type: object
+      required:
+      - function
+      title: ChatCompletionTool
+      description: An OpenAI-compatible tool definition (currently always a function
+        tool).
     ChatCompletionToolMessageParam:
       properties:
         content:
@@ -8910,6 +9124,65 @@ components:
       - source
       title: CreatePlatformJobRequest
       description: Request model for creating a new platform job.
+    CreatePromptRequest:
+      properties:
+        name:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Name
+          description: 'Name of the prompt. Allowed characters: letters (a-z, A-Z),
+            digits (0-9), underscores, hyphens, and dots.'
+          examples:
+          - support-bot-system
+          - summarizer
+        project:
+          title: Project
+          description: The URN of the project associated with this prompt.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        description:
+          title: Description
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+        tools:
+          title: Tools
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+        response_format:
+          title: Response Format
+          additionalProperties: true
+          type: object
+        inference_params:
+          $ref: '#/components/schemas/InferenceParams'
+        tags:
+          title: Tags
+          items:
+            type: string
+          type: array
+      type: object
+      required:
+      - name
+      title: CreatePromptRequest
+      description: Request model for creating a Prompt.
     CreateVirtualModelRequest:
       properties:
         default_model_entity:
@@ -10512,6 +10785,39 @@ components:
       - name
       title: FunctionCall
       description: Function call information.
+    FunctionDefinition:
+      properties:
+        name:
+          type: string
+          maxLength: 255
+          title: Name
+          description: The name of the function to be called.
+        description:
+          title: Description
+          description: A description of what the function does, used by the model
+            to decide when and how to call it.
+          type: string
+        parameters:
+          title: Parameters
+          description: The parameters the function accepts, described as a JSON Schema
+            object.
+          additionalProperties: true
+          type: object
+        strict:
+          title: Strict
+          description: Whether to enforce strict schema adherence when generating
+            the function call.
+          type: boolean
+      type: object
+      required:
+      - name
+      title: FunctionDefinition
+      description: 'An OpenAI-compatible function definition for tool calling.
+
+
+        Mirrors the ``function`` object the Inference Gateway forwards to
+
+        OpenAI-compatible backends.'
     GLiNERDetection:
       properties:
         server_endpoint:
@@ -15064,6 +15370,110 @@ components:
       required:
       - data
       title: ProjectsPage
+    Prompt:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: Unique identifier for the prompt.
+        name:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Name
+          description: 'Name of the entity. Name/workspace combo must be unique across
+            all entities. Allowed characters: letters (a-z, A-Z), digits (0-9), underscores,
+            hyphens, and dots.'
+          examples:
+          - llama-3.1-8b
+          - my-custom-model
+        workspace:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Workspace
+          description: 'The workspace of the entity. Allowed characters: letters (a-z,
+            A-Z), digits (0-9), underscores, hyphens, and dots.'
+        project:
+          title: Project
+          description: The URN of the project associated with this entity.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        created_at:
+          type: string
+          format: date-time
+          title: Created At
+          description: The timestamp of model entity creation
+        updated_at:
+          type: string
+          format: date-time
+          title: Updated At
+          description: The timestamp of the last model entity update
+        description:
+          title: Description
+          description: Optional description of the prompt.
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+          description: Ordered list of chat messages that make up the prompt.
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+          description: Names of the Jinja2 template variables the prompt expects.
+        tools:
+          title: Tools
+          description: Optional OpenAI-compatible tool definitions to send with the
+            prompt.
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+          description: 'Controls which (if any) tool is called: ''none'', ''auto'',
+            ''required'', or a named-tool object.'
+        response_format:
+          title: Response Format
+          description: Optional OpenAI-compatible response_format, e.g. a json_schema
+            structured-output spec.
+          additionalProperties: true
+          type: object
+        inference_params:
+          allOf:
+          - $ref: '#/components/schemas/InferenceParams'
+          description: Optional default model and sampling parameters (temperature,
+            top_p, max_tokens, ...).
+        tags:
+          items:
+            type: string
+          type: array
+          title: Tags
+          description: Optional free-form tags for organizing prompts.
+      type: object
+      required:
+      - name
+      - workspace
+      - created_at
+      - updated_at
+      title: Prompt
+      description: 'A reusable, stored chat prompt.
+
+
+        A Prompt captures the messages, declared template variables, optional tool
+
+        definitions, and default inference parameters needed to invoke a model
+
+        through the Inference Gateway. The unique identifier is workspace/name.'
     PromptData:
       properties:
         system_prompt:
@@ -15086,6 +15496,106 @@ components:
       type: object
       title: PromptData
       description: Configuration for prompt engineering.
+    PromptFilter:
+      additionalProperties: false
+      description: Filter for Prompt queries.
+      properties:
+        workspace:
+          description: Filter by workspace.
+          title: Workspace
+          type: string
+        project:
+          description: Filter by project URN.
+          title: Project
+          type: string
+        name:
+          description: Filter by name.
+          title: Name
+          type: string
+        description:
+          description: Filter by description.
+          title: Description
+          type: string
+        created_at:
+          allOf:
+          - $ref: '#/components/schemas/DatetimeFilter'
+          description: Filter by creation date.
+        updated_at:
+          allOf:
+          - $ref: '#/components/schemas/DatetimeFilter'
+          description: Filter by update date.
+      title: PromptFilter
+      type: object
+    PromptMessage:
+      properties:
+        role:
+          allOf:
+          - $ref: '#/components/schemas/PromptMessageRole'
+          description: The role of the message author.
+        content:
+          type: string
+          title: Content
+          description: Templated message content. May contain template variables.
+      type: object
+      required:
+      - role
+      - content
+      title: PromptMessage
+      description: 'A single templated message in a chat prompt.
+
+
+        ``content`` is a Jinja2 template body that may reference the prompt''s
+
+        declared ``input_variables`` (e.g. ``{{ topic }}``).'
+    PromptMessageRole:
+      type: string
+      enum:
+      - system
+      - developer
+      - user
+      - assistant
+      title: PromptMessageRole
+      description: 'Role of a message author in a chat prompt.
+
+
+        Follows the OpenAI chat schema the Inference Gateway speaks
+
+        (``/v1/chat/completions``).'
+    PromptSort:
+      type: string
+      enum:
+      - name
+      - -name
+      - created_at
+      - -created_at
+      - updated_at
+      - -updated_at
+      title: PromptSort
+      description: Sort fields for Prompt queries.
+    PromptsPage:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Prompt'
+          type: array
+          title: Data
+        pagination:
+          allOf:
+          - $ref: '#/components/schemas/PaginationData'
+          description: Pagination information.
+        sort:
+          title: Sort
+          description: The field on which the results are sorted.
+          type: string
+        filter:
+          title: Filter
+          description: Filtering information.
+          additionalProperties: true
+          type: object
+      type: object
+      required:
+      - data
+      title: PromptsPage
     RailStatus:
       properties:
         status:
@@ -16870,6 +17380,56 @@ components:
 
 
         This endpoint supports partial updates for fields managed by Models Controller.'
+    UpdatePromptRequest:
+      properties:
+        project:
+          title: Project
+          description: The URN of the project associated with this prompt.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        description:
+          title: Description
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+        tools:
+          title: Tools
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+        response_format:
+          title: Response Format
+          additionalProperties: true
+          type: object
+        inference_params:
+          $ref: '#/components/schemas/InferenceParams'
+        tags:
+          title: Tags
+          items:
+            type: string
+          type: array
+      type: object
+      title: UpdatePromptRequest
+      description: 'Request model for replacing a Prompt''s mutable fields (full update).
+
+
+        The prompt name and workspace come from the URL path and cannot be changed.'
     UpdateVirtualModelRequest:
       properties:
         default_model_entity:
diff --git a/sdk/stainless.yaml b/sdk/stainless.yaml
index d8a24cd46b..e000ff3ae4 100644
--- a/sdk/stainless.yaml
+++ b/sdk/stainless.yaml
@@ -462,6 +462,24 @@ resources:
           update: put /apis/models/v2/workspaces/{workspace}/providers/{name}
           delete: delete /apis/models/v2/workspaces/{workspace}/providers/{name}
           update_status: put /apis/models/v2/workspaces/{workspace}/providers/{name}/status
+      prompts:
+        models:
+          chat_completion_tool: ChatCompletionTool
+          create_prompt_request: CreatePromptRequest
+          function_definition: FunctionDefinition
+          prompt: Prompt
+          prompt_filter: PromptFilter
+          prompt_message: PromptMessage
+          prompt_message_role: PromptMessageRole
+          prompt_sort: PromptSort
+          prompts_page: PromptsPage
+          update_prompt_request: UpdatePromptRequest
+        methods:
+          list: get /apis/models/v2/workspaces/{workspace}/prompts
+          create: post /apis/models/v2/workspaces/{workspace}/prompts
+          retrieve: get /apis/models/v2/workspaces/{workspace}/prompts/{name}
+          update: put /apis/models/v2/workspaces/{workspace}/prompts/{name}
+          delete: delete /apis/models/v2/workspaces/{workspace}/prompts/{name}
       gateway:
         subresources:
           openai:
diff --git a/services/core/models/src/nmp/core/models/api/dependencies.py b/services/core/models/src/nmp/core/models/api/dependencies.py
index d43192cb6e..b50a2ade03 100644
--- a/services/core/models/src/nmp/core/models/api/dependencies.py
+++ b/services/core/models/src/nmp/core/models/api/dependencies.py
@@ -12,6 +12,7 @@
 from nmp.core.models.api.service.model_deployment_service import ModelDeploymentService
 from nmp.core.models.api.service.model_entity_service import ModelEntityService
 from nmp.core.models.api.service.model_provider_service import ModelProviderService
+from nmp.core.models.api.service.prompt_service import PromptService
 
 
 def get_model_entity_service(
@@ -35,6 +36,13 @@ def get_model_provider_service(
     return ModelProviderService(entity_client)
 
 
+def get_prompt_service(
+    entity_client: EntityClient = Depends(get_entity_client),
+) -> PromptService:
+    """Dependency to get PromptService instance."""
+    return PromptService(entity_client)
+
+
 def get_model_deployment_config_service(
     entity_client: EntityClient = Depends(get_entity_client),
 ) -> ModelDeploymentConfigService:
diff --git a/services/core/models/src/nmp/core/models/api/service/prompt_service.py b/services/core/models/src/nmp/core/models/api/service/prompt_service.py
new file mode 100644
index 0000000000..4754f91efb
--- /dev/null
+++ b/services/core/models/src/nmp/core/models/api/service/prompt_service.py
@@ -0,0 +1,175 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Service layer for Prompt operations using EntityClient."""
+
+import logging
+
+from nmp.common.api.common import Page, PaginationData
+from nmp.common.api.filter import FilterOperation
+from nmp.common.entities.client import EntityClient, EntityConflictError, EntityNotFoundError
+from nmp.core.models.entities import Prompt as PromptEntity
+from nmp.core.models.schemas import (
+    CreatePromptRequest,
+    DeletePromptRequest,
+    GetPromptRequest,
+    Prompt,
+    UpdatePromptRequest,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _entity_to_schema(entity: PromptEntity) -> Prompt:
+    """Convert an EntityBase Prompt to the API schema."""
+    return Prompt(
+        id=entity.id,
+        name=entity.name,
+        workspace=entity.workspace,
+        project=entity.project,
+        description=entity.description,
+        messages=entity.messages,
+        input_variables=entity.input_variables,
+        tools=entity.tools,
+        tool_choice=entity.tool_choice,
+        response_format=entity.response_format,
+        inference_params=entity.inference_params,
+        tags=entity.tags,
+        created_at=entity.created_at,
+        updated_at=entity.updated_at,
+    )
+
+
+class PromptService:
+    """Service layer for Prompt operations."""
+
+    def __init__(self, entity_client: EntityClient):
+        self.entity_client = entity_client
+
+    async def create_prompt(self, request: CreatePromptRequest, workspace: str) -> Prompt:
+        """Create a new prompt."""
+        logger.debug("Creating prompt", extra={"workspace": workspace, "prompt_name": request.name})
+
+        try:
+            await self.entity_client.get(PromptEntity, name=request.name, workspace=workspace)
+            logger.warning("Prompt already exists", extra={"workspace": workspace, "prompt_name": request.name})
+            raise ValueError(f"Prompt with name '{request.name}' already exists in workspace '{workspace}'")
+        except EntityNotFoundError:
+            pass  # Expected - prompt doesn't exist, proceed with creation
+
+        entity = PromptEntity(
+            name=request.name,
+            workspace=workspace,
+            project=request.project,
+            description=request.description,
+            messages=request.messages,
+            input_variables=request.input_variables,
+            tools=request.tools,
+            tool_choice=request.tool_choice,
+            response_format=request.response_format,
+            inference_params=request.inference_params,
+            tags=request.tags or [],
+        )
+
+        try:
+            created = await self.entity_client.create(entity)
+            logger.info("Prompt created", extra={"workspace": created.workspace, "prompt_name": created.name})
+            return _entity_to_schema(created)
+        except EntityConflictError as e:
+            logger.warning(
+                "Prompt already exists (conflict)",
+                extra={"workspace": workspace, "prompt_name": request.name},
+            )
+            raise ValueError(f"Prompt with name '{request.name}' already exists in workspace '{workspace}'") from e
+
+    async def get_prompt(self, request: GetPromptRequest) -> Prompt | None:
+        """Get a prompt by workspace and name."""
+        logger.debug("Getting prompt", extra={"workspace": request.workspace, "prompt_name": request.name})
+
+        try:
+            entity = await self.entity_client.get(
+                PromptEntity,
+                workspace=request.workspace,
+                name=request.name,
+            )
+            return _entity_to_schema(entity)
+        except EntityNotFoundError:
+            logger.debug("Prompt not found", extra={"workspace": request.workspace, "prompt_name": request.name})
+            return None
+
+    async def list_prompts(
+        self,
+        workspace: str,
+        page: int = 1,
+        page_size: int = 100,
+        sort: str | None = None,
+        filter_operation: FilterOperation | None = None,
+    ) -> Page[Prompt]:
+        """List prompts with filtering and pagination."""
+        logger.debug("Listing prompts", extra={"page": page, "page_size": page_size, "sort": sort})
+
+        result = await self.entity_client.list(
+            PromptEntity,
+            workspace=workspace,
+            filter_operation=filter_operation,
+            sort=sort,
+            page=page,
+            page_size=page_size,
+        )
+
+        prompts = [_entity_to_schema(entity) for entity in result.data]
+
+        return Page(
+            data=prompts,
+            pagination=PaginationData(
+                page=result.pagination.page,
+                page_size=result.pagination.page_size,
+                current_page_size=len(prompts),
+                total_pages=result.pagination.total_pages,
+                total_results=result.pagination.total_results,
+            ),
+            sort=sort,
+            filter=None,
+        )
+
+    async def update_prompt(self, workspace: str, name: str, request: UpdatePromptRequest) -> Prompt | None:
+        """Replace a prompt's mutable fields (full update). Returns None if not found."""
+        logger.debug("Updating prompt", extra={"workspace": workspace, "prompt_name": name})
+
+        try:
+            entity = await self.entity_client.get(PromptEntity, workspace=workspace, name=name)
+        except EntityNotFoundError:
+            logger.warning("Prompt not found for update", extra={"workspace": workspace, "prompt_name": name})
+            return None
+
+        entity.project = request.project
+        entity.description = request.description
+        entity.messages = request.messages
+        entity.input_variables = request.input_variables
+        entity.tools = request.tools
+        entity.tool_choice = request.tool_choice
+        entity.response_format = request.response_format
+        entity.inference_params = request.inference_params
+        if request.tags is not None:
+            entity.tags = request.tags
+
+        updated = await self.entity_client.update(entity)
+        logger.info("Prompt updated", extra={"workspace": updated.workspace, "prompt_name": updated.name})
+        return _entity_to_schema(updated)
+
+    async def delete_prompt(self, request: DeletePromptRequest) -> bool:
+        """Delete a prompt by workspace and name. Returns False if not found."""
+        logger.debug("Deleting prompt", extra={"workspace": request.workspace, "prompt_name": request.name})
+
+        try:
+            await self.entity_client.get(PromptEntity, workspace=request.workspace, name=request.name)
+        except EntityNotFoundError:
+            logger.warning(
+                "Prompt not found for deletion",
+                extra={"workspace": request.workspace, "prompt_name": request.name},
+            )
+            return False
+
+        await self.entity_client.delete(PromptEntity, request.name, workspace=request.workspace)
+        logger.info("Prompt deleted", extra={"workspace": request.workspace, "prompt_name": request.name})
+        return True
diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py
new file mode 100644
index 0000000000..84cbde3e73
--- /dev/null
+++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py
@@ -0,0 +1,186 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+
+from fastapi import APIRouter, Depends, HTTPException, Query, status
+from nmp.common.api.common import Page
+from nmp.common.api.parsed_filter import ParsedFilter, make_filter_dep
+from nmp.common.api.utils import generate_openapi_extra_params
+from nmp.common.entities.client import EntityValidationError
+from nmp.core.models.api.dependencies import get_prompt_service
+from nmp.core.models.api.service.prompt_service import PromptService
+from nmp.core.models.schemas import (
+    CreatePromptRequest,
+    DeletePromptRequest,
+    GetPromptRequest,
+    Prompt,
+    PromptFilter,
+    PromptSort,
+    UpdatePromptRequest,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+
+@router.get(
+    "/v2/workspaces/{workspace}/prompts",
+    summary="List Prompts By Workspace",
+    response_description="Return prompts for a workspace",
+    status_code=status.HTTP_200_OK,
+    response_model=Page[Prompt],
+    response_model_exclude_none=True,
+    openapi_extra=generate_openapi_extra_params(
+        filter_schema=PromptFilter,
+        filter_description=("Filter prompts by workspace, project, name, description, created_at, and updated_at."),
+    ),
+)
+async def list_prompts(
+    workspace: str,
+    page: int = Query(default=1, description="Page number."),
+    page_size: int = Query(default=100, description="Page size."),
+    sort: PromptSort = Query(
+        default=PromptSort.CREATED_AT_ASC,
+        description="The field to sort by. To sort in decreasing order, use `-` in front of the field name.",
+    ),
+    parsed_filter: ParsedFilter = Depends(make_filter_dep(PromptFilter)),
+    service: PromptService = Depends(get_prompt_service),
+) -> Page[Prompt]:
+    """List prompts for a specific workspace."""
+    # Extract workspace — inject from path param if not in filter
+    filter_workspace = parsed_filter.remove("workspace") or workspace
+    try:
+        return await service.list_prompts(
+            workspace=filter_workspace,
+            page=page,
+            page_size=page_size,
+            sort=sort,
+            filter_operation=parsed_filter.operation,
+        )
+    except Exception as e:
+        logger.exception(f"Failed to list prompts for workspace {workspace}")
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+
+
+@router.post(
+    "/v2/workspaces/{workspace}/prompts",
+    summary="Create Prompt",
+    response_description="Create a new prompt",
+    status_code=status.HTTP_201_CREATED,
+)
+async def create_prompt(
+    workspace: str,
+    request: CreatePromptRequest,
+    service: PromptService = Depends(get_prompt_service),
+) -> Prompt:
+    """Create a new prompt."""
+    logger.info(f"Creating prompt: {workspace}/{request.name}")
+    try:
+        return await service.create_prompt(request, workspace)
+    except EntityValidationError as e:
+        logger.warning(f"Entity store validation error during prompt creation: {e}")
+        raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e))
+    except ValueError as e:
+        if "already exists" in str(e).lower():
+            logger.warning(f"Prompt already exists: {workspace}/{request.name}")
+            raise HTTPException(
+                status_code=status.HTTP_409_CONFLICT,
+                detail=f"Prompt with workspace '{workspace}' and name '{request.name}' already exists",
+            )
+        logger.warning(f"Prompt creation validation error: {e}")
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
+    except Exception as e:
+        logger.exception("Failed to create prompt")
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+
+
+@router.get(
+    "/v2/workspaces/{workspace}/prompts/{name}",
+    summary="Get Prompt",
+    response_description="Return prompt details",
+    status_code=status.HTTP_200_OK,
+)
+async def get_prompt(
+    workspace: str,
+    name: str,
+    service: PromptService = Depends(get_prompt_service),
+) -> Prompt:
+    """Get a prompt by workspace and name."""
+    logger.debug(f"Getting prompt: {workspace}/{name}")
+    try:
+        prompt = await service.get_prompt(GetPromptRequest(workspace=workspace, name=name))
+        if not prompt:
+            logger.warning(f"Prompt not found: {workspace}/{name}")
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"Prompt not found: {workspace}/{name}",
+            )
+        return prompt
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(f"Failed to get prompt {workspace}/{name}")
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+
+
+@router.put(
+    "/v2/workspaces/{workspace}/prompts/{name}",
+    summary="Update Prompt",
+    response_description="Update an existing prompt",
+    status_code=status.HTTP_200_OK,
+)
+async def update_prompt(
+    workspace: str,
+    name: str,
+    request: UpdatePromptRequest,
+    service: PromptService = Depends(get_prompt_service),
+) -> Prompt:
+    """Update an existing prompt (full replacement of mutable fields)."""
+    logger.debug(f"Updating prompt: {workspace}/{name}")
+    try:
+        prompt = await service.update_prompt(workspace, name, request)
+        if not prompt:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"Prompt not found: {workspace}/{name}",
+            )
+        return prompt
+    except EntityValidationError as e:
+        logger.warning(f"Entity store validation error during prompt update: {e}")
+        raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e))
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception("Failed to update prompt")
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+
+
+@router.delete(
+    "/v2/workspaces/{workspace}/prompts/{name}",
+    summary="Delete Prompt",
+    response_description="Delete a prompt",
+    status_code=status.HTTP_204_NO_CONTENT,
+)
+async def delete_prompt(
+    workspace: str,
+    name: str,
+    service: PromptService = Depends(get_prompt_service),
+):
+    """Delete a prompt by workspace and name."""
+    logger.info(f"Deleting prompt: {workspace}/{name}")
+    try:
+        deleted = await service.delete_prompt(DeletePromptRequest(workspace=workspace, name=name))
+        if not deleted:
+            logger.warning(f"Prompt not found for deletion: {workspace}/{name}")
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail=f"Prompt not found: {workspace}/{name}",
+            )
+        return None
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(f"Failed to delete prompt {workspace}/{name}")
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
diff --git a/services/core/models/src/nmp/core/models/entities.py b/services/core/models/src/nmp/core/models/entities.py
index 375395ca20..18bbdd2767 100644
--- a/services/core/models/src/nmp/core/models/entities.py
+++ b/services/core/models/src/nmp/core/models/entities.py
@@ -8,6 +8,7 @@
 from nmp.common.auth import AuthContext
 from nmp.common.entities import constants
 from nmp.common.entities.client import EntityBase
+from nmp.common.inference import InferenceParams
 from nmp.core.models.constants import (
     MODEL_REF_MAX_LEN,
     MODEL_REF_PATTERN_DESCRIPTION,
@@ -16,6 +17,7 @@
 from nmp.core.models.schemas import (
     APIEndpointData,
     BackendFormat,
+    ChatCompletionTool,
     FinetuningType,
     Lora,
     ModelDeploymentStatus,
@@ -23,6 +25,7 @@
     ModelSpec,
     NIMDeployment,
     PromptData,
+    PromptMessage,
     ServedModelMapping,
 )
 from pydantic import Field, PrivateAttr, computed_field, field_validator, model_validator
@@ -341,3 +344,53 @@ class ModelDeploymentConfig(EntityBase):
         description="Optional description of the deployment configuration.",
         max_length=1000,
     )
+
+
+class Prompt(EntityBase):
+    """A reusable, stored chat prompt, addressed by workspace/name.
+
+    Captures the messages, declared template variables, optional tool definitions,
+    and default inference parameters needed to invoke a model through the
+    Inference Gateway.
+    """
+
+    __entity_type__: ClassVar[str] = "prompt"
+
+    project: str | None = Field(
+        default=None,
+        description="The URN of the project associated with this prompt.",
+        max_length=constants.MAX_LENGTH_255,
+    )
+    description: str | None = Field(
+        default=None,
+        description="Optional description of the prompt.",
+        max_length=1000,
+    )
+    messages: list[PromptMessage] = Field(
+        default_factory=list,
+        description="Ordered list of chat messages that make up the prompt.",
+    )
+    input_variables: list[str] = Field(
+        default_factory=list,
+        description="Names of the Jinja2 template variables the prompt expects.",
+    )
+    tools: list[ChatCompletionTool] | None = Field(
+        default=None,
+        description="Optional OpenAI-compatible tool definitions to send with the prompt.",
+    )
+    tool_choice: str | dict[str, Any] | None = Field(
+        default=None,
+        description="Controls which (if any) tool is called: 'none', 'auto', 'required', or a named-tool object.",
+    )
+    response_format: dict[str, Any] | None = Field(
+        default=None,
+        description="Optional OpenAI-compatible response_format, e.g. a json_schema structured-output spec.",
+    )
+    inference_params: InferenceParams | None = Field(
+        default=None,
+        description="Optional default model and sampling parameters (temperature, top_p, max_tokens, ...).",
+    )
+    tags: list[str] = Field(
+        default_factory=list,
+        description="Optional free-form tags for organizing prompts.",
+    )
diff --git a/services/core/models/src/nmp/core/models/schemas.py b/services/core/models/src/nmp/core/models/schemas.py
index 889f62d9a3..47e060ba49 100644
--- a/services/core/models/src/nmp/core/models/schemas.py
+++ b/services/core/models/src/nmp/core/models/schemas.py
@@ -4,7 +4,7 @@
 from abc import ABC
 from datetime import datetime
 from enum import Enum, StrEnum
-from typing import Annotated, Any, Dict, List, Optional, Union
+from typing import Annotated, Any, Dict, List, Literal, Optional, Union
 
 from jinja2 import Environment
 from jinja2 import nodes as jinja_nodes
@@ -699,6 +699,205 @@ class DeleteModelProviderRequest(BaseModel):
     )
 
 
+# ============================================================================
+# Prompt Schemas
+# ============================================================================
+
+
+class PromptMessageRole(StrEnum):
+    """Role of a message author in a chat prompt.
+
+    Follows the OpenAI chat schema the Inference Gateway speaks
+    (``/v1/chat/completions``).
+    """
+
+    SYSTEM = "system"
+    DEVELOPER = "developer"
+    USER = "user"
+    ASSISTANT = "assistant"
+
+
+class PromptMessage(BaseModel):
+    """A single templated message in a chat prompt.
+
+    ``content`` is a Jinja2 template body that may reference the prompt's
+    declared ``input_variables`` (e.g. ``{{ topic }}``).
+    """
+
+    role: PromptMessageRole = Field(description="The role of the message author.")
+    content: str = Field(description="Templated message content. May contain template variables.")
+
+
+class FunctionDefinition(BaseModel):
+    """An OpenAI-compatible function definition for tool calling.
+
+    Mirrors the ``function`` object the Inference Gateway forwards to
+    OpenAI-compatible backends.
+    """
+
+    name: str = Field(
+        description="The name of the function to be called.",
+        max_length=constants.MAX_LENGTH_255,
+    )
+    description: Optional[str] = Field(
+        default=None,
+        description="A description of what the function does, used by the model to decide when and how to call it.",
+    )
+    parameters: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="The parameters the function accepts, described as a JSON Schema object.",
+    )
+    strict: Optional[bool] = Field(
+        default=None,
+        description="Whether to enforce strict schema adherence when generating the function call.",
+    )
+
+
+class ChatCompletionTool(BaseModel):
+    """An OpenAI-compatible tool definition (currently always a function tool)."""
+
+    type: Literal["function"] = Field(
+        default="function",
+        description="The type of the tool. Currently only 'function' is supported.",
+    )
+    function: FunctionDefinition = Field(description="The function definition for this tool.")
+
+
+class Prompt(ModelEntityBaseModel):
+    """A reusable, stored chat prompt.
+
+    A Prompt captures the messages, declared template variables, optional tool
+    definitions, and default inference parameters needed to invoke a model
+    through the Inference Gateway. The unique identifier is workspace/name.
+    """
+
+    id: str = Field(
+        default_factory=lambda: get_model_id("prompt"),
+        description="Unique identifier for the prompt.",
+    )
+    description: Optional[str] = Field(
+        default=None,
+        description="Optional description of the prompt.",
+        max_length=1000,
+    )
+    messages: List[PromptMessage] = Field(
+        default_factory=list,
+        description="Ordered list of chat messages that make up the prompt.",
+    )
+    input_variables: List[str] = Field(
+        default_factory=list,
+        description="Names of the Jinja2 template variables the prompt expects.",
+    )
+    tools: Optional[List[ChatCompletionTool]] = Field(
+        default=None,
+        description="Optional OpenAI-compatible tool definitions to send with the prompt.",
+    )
+    tool_choice: Optional[Union[str, Dict[str, Any]]] = Field(
+        default=None,
+        description="Controls which (if any) tool is called: 'none', 'auto', 'required', or a named-tool object.",
+    )
+    response_format: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Optional OpenAI-compatible response_format, e.g. a json_schema structured-output spec.",
+    )
+    inference_params: Optional[InferenceParams] = Field(
+        default=None,
+        description="Optional default model and sampling parameters (temperature, top_p, max_tokens, ...).",
+    )
+    tags: List[str] = Field(
+        default_factory=list,
+        description="Optional free-form tags for organizing prompts.",
+    )
+
+
+class PromptSort(StrEnum):
+    """Sort fields for Prompt queries."""
+
+    NAME_ASC = "name"
+    NAME_DESC = "-name"
+    CREATED_AT_ASC = "created_at"
+    CREATED_AT_DESC = "-created_at"
+    UPDATED_AT_ASC = "updated_at"
+    UPDATED_AT_DESC = "-updated_at"
+
+
+class CreatePromptRequest(BaseModel):
+    """Request model for creating a Prompt."""
+
+    name: str = Field(
+        description=f"Name of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}",
+        max_length=constants.MAX_LENGTH_255,
+        pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH,
+        examples=["support-bot-system", "summarizer"],
+    )
+    project: Optional[str] = Field(
+        default=None,
+        description="The URN of the project associated with this prompt.",
+        max_length=constants.MAX_LENGTH_255,
+        pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH_SLASH,
+    )
+    description: Optional[str] = Field(default=None, max_length=1000)
+    messages: List[PromptMessage] = Field(default_factory=list)
+    input_variables: List[str] = Field(default_factory=list)
+    tools: Optional[List[ChatCompletionTool]] = Field(default=None)
+    tool_choice: Optional[Union[str, Dict[str, Any]]] = Field(default=None)
+    response_format: Optional[Dict[str, Any]] = Field(default=None)
+    inference_params: Optional[InferenceParams] = Field(default=None)
+    tags: Optional[List[str]] = Field(default=None)
+
+
+class UpdatePromptRequest(BaseModel):
+    """Request model for replacing a Prompt's mutable fields (full update).
+
+    The prompt name and workspace come from the URL path and cannot be changed.
+    """
+
+    project: Optional[str] = Field(
+        default=None,
+        description="The URN of the project associated with this prompt.",
+        max_length=constants.MAX_LENGTH_255,
+        pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH_SLASH,
+    )
+    description: Optional[str] = Field(default=None, max_length=1000)
+    messages: List[PromptMessage] = Field(default_factory=list)
+    input_variables: List[str] = Field(default_factory=list)
+    tools: Optional[List[ChatCompletionTool]] = Field(default=None)
+    tool_choice: Optional[Union[str, Dict[str, Any]]] = Field(default=None)
+    response_format: Optional[Dict[str, Any]] = Field(default=None)
+    inference_params: Optional[InferenceParams] = Field(default=None)
+    tags: Optional[List[str]] = Field(default=None)
+
+
+class GetPromptRequest(BaseModel):
+    """Request model for getting a Prompt."""
+
+    workspace: str = Field(
+        description=f"The workspace of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}",
+        max_length=constants.MAX_LENGTH_255,
+        pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH,
+    )
+    name: str = Field(
+        description=f"Name of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}",
+        max_length=constants.MAX_LENGTH_255,
+        pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH,
+    )
+
+
+class DeletePromptRequest(BaseModel):
+    """Request model for deleting a Prompt."""
+
+    workspace: str = Field(
+        description=f"The workspace of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}",
+        max_length=constants.MAX_LENGTH_255,
+        pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH,
+    )
+    name: str = Field(
+        description=f"Name of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}",
+        max_length=constants.MAX_LENGTH_255,
+        pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH,
+    )
+
+
 # ============================================================================
 # Model Entity Schemas
 # ============================================================================
@@ -1442,6 +1641,17 @@ class ListModelDeploymentsRequest(BaseModel):
 # ============================================================================
 
 
+class PromptFilter(Filter):
+    """Filter for Prompt queries."""
+
+    workspace: Optional[str] = Field(None, description="Filter by workspace.")
+    project: Optional[str] = Field(None, description="Filter by project URN.")
+    name: Optional[str] = Field(None, description="Filter by name.")
+    description: Optional[str] = Field(None, description="Filter by description.")
+    created_at: Optional[DatetimeFilter] = Field(None, description="Filter by creation date.")
+    updated_at: Optional[DatetimeFilter] = Field(None, description="Filter by update date.")
+
+
 class ModelProviderFilter(Filter):
     """Filter for ModelProvider queries."""
 
diff --git a/services/core/models/src/nmp/core/models/service.py b/services/core/models/src/nmp/core/models/service.py
index 2ff89fa6ea..0c39e81e02 100644
--- a/services/core/models/src/nmp/core/models/service.py
+++ b/services/core/models/src/nmp/core/models/service.py
@@ -35,7 +35,7 @@ def description(self) -> str:
 
     def get_routers(self) -> List[RouterConfig]:
         """Return routers for the models service."""
-        from nmp.core.models.api.v2 import adapters, deployment_configs, deployments, models, providers
+        from nmp.core.models.api.v2 import adapters, deployment_configs, deployments, models, prompts, providers
 
         return [
             RouterConfig(
@@ -63,6 +63,11 @@ def get_routers(self) -> List[RouterConfig]:
                 tag="ModelProviders",
                 description="Operations related to model providers.",
             ),
+            RouterConfig(
+                prompts.router,
+                tag="Prompts",
+                description="CRUD operations for reusable chat prompt entities.",
+            ),
         ]
 
     def configure_app(self) -> None:
diff --git a/services/core/models/tests/unit/api/test_prompts_api.py b/services/core/models/tests/unit/api/test_prompts_api.py
new file mode 100644
index 0000000000..fd30eabba8
--- /dev/null
+++ b/services/core/models/tests/unit/api/test_prompts_api.py
@@ -0,0 +1,244 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Tests for Prompt API endpoints."""
+
+from datetime import datetime
+from unittest.mock import AsyncMock, Mock
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+from nmp.common.api.common import Page, PaginationData
+from nmp.common.entities.client import EntityValidationError
+from nmp.core.models.api.service.prompt_service import PromptService
+from nmp.core.models.api.v2.prompts import router
+from nmp.core.models.schemas import Prompt, PromptMessage, PromptMessageRole
+
+
+@pytest.fixture
+def mock_prompt_service():
+    """Create a mock PromptService."""
+    service = Mock(spec=PromptService)
+    service.list_prompts = AsyncMock()
+    service.get_prompt = AsyncMock()
+    service.create_prompt = AsyncMock()
+    service.update_prompt = AsyncMock()
+    service.delete_prompt = AsyncMock()
+    return service
+
+
+@pytest.fixture
+def test_app(mock_prompt_service):
+    """Create a FastAPI test app with the prompt service dependency overridden."""
+    from nmp.core.models.api.dependencies import get_prompt_service
+
+    app = FastAPI()
+    app.dependency_overrides[get_prompt_service] = lambda: mock_prompt_service
+    app.include_router(router, prefix="/apis/models")
+    return app
+
+
+@pytest.fixture
+def client(test_app):
+    return TestClient(test_app)
+
+
+@pytest.fixture
+def sample_prompt():
+    return Prompt(
+        id="prompt-1",
+        name="summarizer",
+        workspace="default",
+        description="A summarization prompt",
+        messages=[PromptMessage(role=PromptMessageRole.USER, content="Summarize: {{ document }}")],
+        input_variables=["document"],
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+    )
+
+
+@pytest.fixture
+def sample_page(sample_prompt):
+    return Page(
+        data=[sample_prompt],
+        pagination=PaginationData(
+            page=1,
+            page_size=100,
+            current_page_size=1,
+            total_results=1,
+            total_pages=1,
+        ),
+        sort="created_at",
+        filter=None,
+    )
+
+
+def test_list_prompts_default_parameters(client, mock_prompt_service, sample_page):
+    mock_prompt_service.list_prompts.return_value = sample_page
+
+    response = client.get("/apis/models/v2/workspaces/default/prompts")
+
+    assert response.status_code == 200
+    call_args = mock_prompt_service.list_prompts.call_args
+    assert call_args.kwargs["page"] == 1
+    assert call_args.kwargs["page_size"] == 100
+    assert call_args.kwargs["sort"] == "created_at"
+    assert call_args.kwargs["workspace"] == "default"
+
+
+def test_list_prompts_with_sort(client, mock_prompt_service, sample_page):
+    mock_prompt_service.list_prompts.return_value = sample_page
+
+    response = client.get("/apis/models/v2/workspaces/default/prompts?sort=-name")
+
+    assert response.status_code == 200
+    assert mock_prompt_service.list_prompts.call_args.kwargs["sort"] == "-name"
+
+
+def test_list_prompts_with_name_filter(client, mock_prompt_service, sample_page):
+    mock_prompt_service.list_prompts.return_value = sample_page
+
+    response = client.get("/apis/models/v2/workspaces/default/prompts?filter[name][]=summarizer")
+
+    assert response.status_code == 200
+    assert mock_prompt_service.list_prompts.call_args.kwargs.get("filter_operation") is not None
+
+
+def test_list_prompts_response_structure(client, mock_prompt_service, sample_page):
+    mock_prompt_service.list_prompts.return_value = sample_page
+
+    response = client.get("/apis/models/v2/workspaces/default/prompts")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert "data" in data
+    assert "pagination" in data
+    assert len(data["data"]) == 1
+    assert data["data"][0]["name"] == "summarizer"
+
+
+def test_create_prompt_success(client, mock_prompt_service, sample_prompt):
+    mock_prompt_service.create_prompt.return_value = sample_prompt
+
+    request_body = {
+        "name": "summarizer",
+        "messages": [{"role": "user", "content": "Summarize: {{ document }}"}],
+        "input_variables": ["document"],
+    }
+
+    response = client.post("/apis/models/v2/workspaces/default/prompts", json=request_body)
+
+    assert response.status_code == 201
+    data = response.json()
+    assert data["name"] == "summarizer"
+    assert data["messages"][0]["role"] == "user"
+
+
+def test_create_prompt_with_tools(client, mock_prompt_service, sample_prompt):
+    mock_prompt_service.create_prompt.return_value = sample_prompt
+
+    request_body = {
+        "name": "weather-bot",
+        "messages": [{"role": "system", "content": "You can call tools."}],
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "description": "Get weather",
+                    "parameters": {"type": "object", "properties": {"city": {"type": "string"}}},
+                },
+            }
+        ],
+        "tool_choice": "auto",
+    }
+
+    response = client.post("/apis/models/v2/workspaces/default/prompts", json=request_body)
+
+    assert response.status_code == 201
+    # The request validated and reached the service with parsed tools.
+    sent_request = mock_prompt_service.create_prompt.call_args[0][0]
+    assert sent_request.tools[0].function.name == "get_weather"
+    assert sent_request.tool_choice == "auto"
+
+
+def test_create_prompt_conflict_returns_409(client, mock_prompt_service):
+    mock_prompt_service.create_prompt.side_effect = ValueError(
+        "Prompt with name 'summarizer' already exists in workspace 'default'"
+    )
+
+    response = client.post(
+        "/apis/models/v2/workspaces/default/prompts",
+        json={"name": "summarizer"},
+    )
+
+    assert response.status_code == 409
+
+
+def test_create_prompt_entity_validation_error_returns_422(client, mock_prompt_service):
+    mock_prompt_service.create_prompt.side_effect = EntityValidationError("name must match pattern")
+
+    response = client.post(
+        "/apis/models/v2/workspaces/default/prompts",
+        json={"name": "summarizer"},
+    )
+
+    assert response.status_code == 422
+    assert "name must match pattern" in response.json()["detail"]
+
+
+def test_get_prompt_success(client, mock_prompt_service, sample_prompt):
+    mock_prompt_service.get_prompt.return_value = sample_prompt
+
+    response = client.get("/apis/models/v2/workspaces/default/prompts/summarizer")
+
+    assert response.status_code == 200
+    assert response.json()["name"] == "summarizer"
+
+
+def test_get_prompt_not_found_returns_404(client, mock_prompt_service):
+    mock_prompt_service.get_prompt.return_value = None
+
+    response = client.get("/apis/models/v2/workspaces/default/prompts/missing")
+
+    assert response.status_code == 404
+
+
+def test_update_prompt_success(client, mock_prompt_service, sample_prompt):
+    mock_prompt_service.update_prompt.return_value = sample_prompt
+
+    response = client.put(
+        "/apis/models/v2/workspaces/default/prompts/summarizer",
+        json={"description": "updated", "messages": [{"role": "user", "content": "hi"}]},
+    )
+
+    assert response.status_code == 200
+    assert response.json()["name"] == "summarizer"
+
+
+def test_update_prompt_not_found_returns_404(client, mock_prompt_service):
+    mock_prompt_service.update_prompt.return_value = None
+
+    response = client.put(
+        "/apis/models/v2/workspaces/default/prompts/missing",
+        json={"description": "updated"},
+    )
+
+    assert response.status_code == 404
+
+
+def test_delete_prompt_success(client, mock_prompt_service):
+    mock_prompt_service.delete_prompt.return_value = True
+
+    response = client.delete("/apis/models/v2/workspaces/default/prompts/summarizer")
+
+    assert response.status_code == 204
+
+
+def test_delete_prompt_not_found_returns_404(client, mock_prompt_service):
+    mock_prompt_service.delete_prompt.return_value = False
+
+    response = client.delete("/apis/models/v2/workspaces/default/prompts/missing")
+
+    assert response.status_code == 404
diff --git a/services/core/models/tests/unit/test_prompt_service_unit.py b/services/core/models/tests/unit/test_prompt_service_unit.py
new file mode 100644
index 0000000000..68531fe3b8
--- /dev/null
+++ b/services/core/models/tests/unit/test_prompt_service_unit.py
@@ -0,0 +1,231 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Unit tests for Prompt service with mocked EntityClient."""
+
+from datetime import datetime, timezone
+from typing import Any
+from unittest.mock import AsyncMock
+
+import pytest
+from nmp.common.entities.client import EntityClient, EntityNotFoundError
+from nmp.core.models.api.service.prompt_service import PromptService
+from nmp.core.models.entities import Prompt as PromptEntity
+from nmp.core.models.schemas import (
+    ChatCompletionTool,
+    CreatePromptRequest,
+    DeletePromptRequest,
+    FunctionDefinition,
+    GetPromptRequest,
+    Prompt,
+    PromptMessage,
+    PromptMessageRole,
+    UpdatePromptRequest,
+)
+
+
+def create_prompt_entity(
+    entity_id: str = "prompt-id-123",
+    created_at: datetime | None = None,
+    updated_at: datetime | None = None,
+    **kwargs: Any,
+) -> PromptEntity:
+    """Helper to create a PromptEntity with the store-managed private attributes set."""
+    entity = PromptEntity(**kwargs)
+    entity._id = entity_id
+    entity._created_at = created_at or datetime.now(timezone.utc)
+    entity._updated_at = updated_at or datetime.now(timezone.utc)
+    return entity
+
+
+@pytest.fixture
+def mock_entity_client() -> AsyncMock:
+    """Create a mock EntityClient for testing."""
+    return AsyncMock(spec=EntityClient)
+
+
+@pytest.fixture
+def prompt_service(mock_entity_client):
+    """Create a PromptService with mocked EntityClient."""
+    return PromptService(mock_entity_client)
+
+
+@pytest.fixture
+def sample_messages() -> list[PromptMessage]:
+    return [
+        PromptMessage(role=PromptMessageRole.SYSTEM, content="You are a helpful {{ persona }}."),
+        PromptMessage(role=PromptMessageRole.USER, content="Summarize: {{ document }}"),
+    ]
+
+
+@pytest.fixture
+def sample_tools() -> list[ChatCompletionTool]:
+    return [
+        ChatCompletionTool(
+            function=FunctionDefinition(
+                name="get_weather",
+                description="Get the current weather for a city.",
+                parameters={"type": "object", "properties": {"city": {"type": "string"}}},
+            )
+        )
+    ]
+
+
+@pytest.fixture
+def sample_create_request(sample_messages, sample_tools) -> CreatePromptRequest:
+    return CreatePromptRequest(
+        name="summarizer",
+        project="test-project",
+        description="A summarization prompt",
+        messages=sample_messages,
+        input_variables=["persona", "document"],
+        tools=sample_tools,
+        tool_choice="auto",
+        tags=["nlp", "summarize"],
+    )
+
+
+@pytest.fixture
+def sample_prompt_entity(sample_messages, sample_tools) -> PromptEntity:
+    return create_prompt_entity(
+        name="summarizer",
+        workspace="default",
+        project="test-project",
+        description="A summarization prompt",
+        messages=sample_messages,
+        input_variables=["persona", "document"],
+        tools=sample_tools,
+        tool_choice="auto",
+        tags=["nlp", "summarize"],
+    )
+
+
+@pytest.mark.asyncio
+async def test_create_prompt_success(prompt_service, mock_entity_client, sample_create_request, sample_prompt_entity):
+    """Test successful prompt creation."""
+    mock_entity_client.get.side_effect = EntityNotFoundError("Entity not found")
+    mock_entity_client.create.return_value = sample_prompt_entity
+
+    result = await prompt_service.create_prompt(sample_create_request, "default")
+
+    assert isinstance(result, Prompt)
+    assert result.name == "summarizer"
+    assert result.workspace == "default"
+    assert result.input_variables == ["persona", "document"]
+    assert result.tools is not None
+    assert result.tools[0].function.name == "get_weather"
+    mock_entity_client.create.assert_called_once()
+    created_entity = mock_entity_client.create.call_args[0][0]
+    assert isinstance(created_entity, PromptEntity)
+    assert created_entity.name == "summarizer"
+    assert len(created_entity.messages) == 2
+
+
+@pytest.mark.asyncio
+async def test_create_prompt_conflict_raises_value_error(
+    prompt_service, mock_entity_client, sample_create_request, sample_prompt_entity
+):
+    """Test that an existing prompt causes a ValueError and no create call."""
+    mock_entity_client.get.return_value = sample_prompt_entity  # already exists
+
+    with pytest.raises(ValueError, match="already exists"):
+        await prompt_service.create_prompt(sample_create_request, "default")
+
+    mock_entity_client.create.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_get_prompt_found(prompt_service, mock_entity_client, sample_prompt_entity):
+    """Test retrieving an existing prompt."""
+    mock_entity_client.get.return_value = sample_prompt_entity
+
+    result = await prompt_service.get_prompt(GetPromptRequest(workspace="default", name="summarizer"))
+
+    assert result is not None
+    assert result.name == "summarizer"
+    assert result.tool_choice == "auto"
+
+
+@pytest.mark.asyncio
+async def test_get_prompt_not_found(prompt_service, mock_entity_client):
+    """Test that a missing prompt returns None."""
+    mock_entity_client.get.side_effect = EntityNotFoundError("not found")
+
+    result = await prompt_service.get_prompt(GetPromptRequest(workspace="default", name="missing"))
+
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_list_prompts(prompt_service, mock_entity_client, sample_prompt_entity):
+    """Test listing prompts returns a Page with mapped schemas."""
+    mock_result = AsyncMock()
+    mock_result.data = [sample_prompt_entity]
+    mock_result.pagination = AsyncMock(page=1, page_size=100, total_pages=1, total_results=1)
+    mock_entity_client.list.return_value = mock_result
+
+    page = await prompt_service.list_prompts(workspace="default", page=1, page_size=100, sort="created_at")
+
+    assert page.pagination.total_results == 1
+    assert page.pagination.current_page_size == 1
+    assert len(page.data) == 1
+    assert page.data[0].name == "summarizer"
+
+
+@pytest.mark.asyncio
+async def test_update_prompt_success(prompt_service, mock_entity_client, sample_prompt_entity):
+    """Test updating an existing prompt replaces mutable fields."""
+    mock_entity_client.get.return_value = sample_prompt_entity
+    mock_entity_client.update.return_value = sample_prompt_entity
+
+    request = UpdatePromptRequest(
+        description="Updated description",
+        messages=[PromptMessage(role=PromptMessageRole.USER, content="New {{ x }}")],
+        input_variables=["x"],
+        tags=["updated"],
+    )
+
+    result = await prompt_service.update_prompt("default", "summarizer", request)
+
+    assert result is not None
+    mock_entity_client.update.assert_called_once()
+    updated_entity = mock_entity_client.update.call_args[0][0]
+    assert updated_entity.description == "Updated description"
+    assert updated_entity.input_variables == ["x"]
+    assert updated_entity.tags == ["updated"]
+    # Full replacement clears fields not present in the request
+    assert updated_entity.tools is None
+    assert updated_entity.tool_choice is None
+
+
+@pytest.mark.asyncio
+async def test_update_prompt_not_found(prompt_service, mock_entity_client):
+    """Test that updating a missing prompt returns None."""
+    mock_entity_client.get.side_effect = EntityNotFoundError("not found")
+
+    result = await prompt_service.update_prompt("default", "missing", UpdatePromptRequest())
+
+    assert result is None
+    mock_entity_client.update.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_delete_prompt_success(prompt_service, mock_entity_client, sample_prompt_entity):
+    """Test deleting an existing prompt returns True."""
+    mock_entity_client.get.return_value = sample_prompt_entity
+
+    result = await prompt_service.delete_prompt(DeletePromptRequest(workspace="default", name="summarizer"))
+
+    assert result is True
+    mock_entity_client.delete.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_delete_prompt_not_found(prompt_service, mock_entity_client):
+    """Test that deleting a missing prompt returns False and does not call delete."""
+    mock_entity_client.get.side_effect = EntityNotFoundError("not found")
+
+    result = await prompt_service.delete_prompt(DeletePromptRequest(workspace="default", name="missing"))
+
+    assert result is False
+    mock_entity_client.delete.assert_not_called()

From 92c7b1048ecc83fa97bad8f0df1710d4a8302aa1 Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Tue, 9 Jun 2026 15:29:13 -0700
Subject: [PATCH 02/10] Potential fix for pull request finding 'CodeQL / Log
 Injection'

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 services/core/models/src/nmp/core/models/api/v2/prompts.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py
index 84cbde3e73..0419c0b525 100644
--- a/services/core/models/src/nmp/core/models/api/v2/prompts.py
+++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py
@@ -60,7 +60,8 @@ async def list_prompts(
             filter_operation=parsed_filter.operation,
         )
     except Exception as e:
-        logger.exception(f"Failed to list prompts for workspace {workspace}")
+        safe_workspace = str(workspace).replace("\r", "").replace("\n", "")
+        logger.exception(f"Failed to list prompts for workspace {safe_workspace}")
         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
 
 

From 0efe1d6ef7264f3734c3e9d30fb3cab6b05b2c06 Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Tue, 9 Jun 2026 15:29:22 -0700
Subject: [PATCH 03/10] Potential fix for pull request finding 'CodeQL / Log
 Injection'

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 .../models/src/nmp/core/models/api/v2/prompts.py     | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py
index 0419c0b525..70bd2433a9 100644
--- a/services/core/models/src/nmp/core/models/api/v2/prompts.py
+++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py
@@ -22,6 +22,12 @@
 
 logger = logging.getLogger(__name__)
 
+
+def _sanitize_for_log(value: object) -> str:
+    """Prevent log injection by removing line-break/control characters."""
+    return str(value).replace("\r", "").replace("\n", "")
+
+
 router = APIRouter()
 
 
@@ -77,7 +83,9 @@ async def create_prompt(
     service: PromptService = Depends(get_prompt_service),
 ) -> Prompt:
     """Create a new prompt."""
-    logger.info(f"Creating prompt: {workspace}/{request.name}")
+    safe_workspace = _sanitize_for_log(workspace)
+    safe_request_name = _sanitize_for_log(request.name)
+    logger.info(f"Creating prompt: {safe_workspace}/{safe_request_name}")
     try:
         return await service.create_prompt(request, workspace)
     except EntityValidationError as e:
@@ -85,7 +93,7 @@ async def create_prompt(
         raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e))
     except ValueError as e:
         if "already exists" in str(e).lower():
-            logger.warning(f"Prompt already exists: {workspace}/{request.name}")
+            logger.warning(f"Prompt already exists: {safe_workspace}/{safe_request_name}")
             raise HTTPException(
                 status_code=status.HTTP_409_CONFLICT,
                 detail=f"Prompt with workspace '{workspace}' and name '{request.name}' already exists",

From bf6ffb610b8dc9349162d081fe1feee89c2872b0 Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Tue, 9 Jun 2026 15:37:21 -0700
Subject: [PATCH 04/10] fix(models): complete log-injection sanitization for
 prompt API

The CodeQL autofix only patched list_prompts and create_prompt, leaving
get_prompt, update_prompt, and delete_prompt still interpolating raw
user-controlled workspace/name into log messages (alerts 4168-4174).

Sanitize the remaining log calls via _sanitize_for_log, and switch
list_prompts to use the shared helper instead of an inlined replace for
consistency.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 .../src/nmp/core/models/api/v2/prompts.py       | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py
index 70bd2433a9..9f01a2d1d2 100644
--- a/services/core/models/src/nmp/core/models/api/v2/prompts.py
+++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py
@@ -66,8 +66,7 @@ async def list_prompts(
             filter_operation=parsed_filter.operation,
         )
     except Exception as e:
-        safe_workspace = str(workspace).replace("\r", "").replace("\n", "")
-        logger.exception(f"Failed to list prompts for workspace {safe_workspace}")
+        logger.exception(f"Failed to list prompts for workspace {_sanitize_for_log(workspace)}")
         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
 
 
@@ -117,11 +116,11 @@ async def get_prompt(
     service: PromptService = Depends(get_prompt_service),
 ) -> Prompt:
     """Get a prompt by workspace and name."""
-    logger.debug(f"Getting prompt: {workspace}/{name}")
+    logger.debug(f"Getting prompt: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}")
     try:
         prompt = await service.get_prompt(GetPromptRequest(workspace=workspace, name=name))
         if not prompt:
-            logger.warning(f"Prompt not found: {workspace}/{name}")
+            logger.warning(f"Prompt not found: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}")
             raise HTTPException(
                 status_code=status.HTTP_404_NOT_FOUND,
                 detail=f"Prompt not found: {workspace}/{name}",
@@ -130,7 +129,7 @@ async def get_prompt(
     except HTTPException:
         raise
     except Exception as e:
-        logger.exception(f"Failed to get prompt {workspace}/{name}")
+        logger.exception(f"Failed to get prompt {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}")
         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
 
 
@@ -147,7 +146,7 @@ async def update_prompt(
     service: PromptService = Depends(get_prompt_service),
 ) -> Prompt:
     """Update an existing prompt (full replacement of mutable fields)."""
-    logger.debug(f"Updating prompt: {workspace}/{name}")
+    logger.debug(f"Updating prompt: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}")
     try:
         prompt = await service.update_prompt(workspace, name, request)
         if not prompt:
@@ -178,11 +177,11 @@ async def delete_prompt(
     service: PromptService = Depends(get_prompt_service),
 ):
     """Delete a prompt by workspace and name."""
-    logger.info(f"Deleting prompt: {workspace}/{name}")
+    logger.info(f"Deleting prompt: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}")
     try:
         deleted = await service.delete_prompt(DeletePromptRequest(workspace=workspace, name=name))
         if not deleted:
-            logger.warning(f"Prompt not found for deletion: {workspace}/{name}")
+            logger.warning(f"Prompt not found for deletion: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}")
             raise HTTPException(
                 status_code=status.HTTP_404_NOT_FOUND,
                 detail=f"Prompt not found: {workspace}/{name}",
@@ -191,5 +190,5 @@ async def delete_prompt(
     except HTTPException:
         raise
     except Exception as e:
-        logger.exception(f"Failed to delete prompt {workspace}/{name}")
+        logger.exception(f"Failed to delete prompt {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}")
         raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))

From 85c27281f9d580dddc492b3e5a16784b9c37380f Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Tue, 9 Jun 2026 16:02:30 -0700
Subject: [PATCH 05/10] fix(models): address PR review findings in prompt API

- Prevent workspace filter from overriding the path-scoped workspace
  in list_prompts (cross-workspace read vector)
- Replace detail=str(e) with generic message in all 500 handlers to
  avoid leaking backend internals to clients
- Fix PUT full-replacement: tags was conditionally skipped; now always
  replaced (entity.tags = request.tags or [])
- Add ge=1/le=1000 bounds on page/page_size Query params
- Make ChatCompletionTool.type required (no default) so the generated
  OpenAPI schema marks it as required per OpenAI spec
- Fix validation error handler to return "Invalid prompt data" instead
  of the raw exception string
- Add tests: workspace scope isolation, page/page_size bounds, tags
  cleared on omission

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 .../core/models/api/service/prompt_service.py |  3 +-
 .../src/nmp/core/models/api/v2/prompts.py     | 32 +++++++++----------
 .../models/src/nmp/core/models/schemas.py     |  1 -
 .../models/tests/unit/api/test_prompts_api.py | 19 +++++++++++
 .../tests/unit/test_prompt_service_unit.py    | 18 ++++++++++-
 5 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/services/core/models/src/nmp/core/models/api/service/prompt_service.py b/services/core/models/src/nmp/core/models/api/service/prompt_service.py
index 4754f91efb..9d068be2dd 100644
--- a/services/core/models/src/nmp/core/models/api/service/prompt_service.py
+++ b/services/core/models/src/nmp/core/models/api/service/prompt_service.py
@@ -150,8 +150,7 @@ async def update_prompt(self, workspace: str, name: str, request: UpdatePromptRe
         entity.tool_choice = request.tool_choice
         entity.response_format = request.response_format
         entity.inference_params = request.inference_params
-        if request.tags is not None:
-            entity.tags = request.tags
+        entity.tags = request.tags or []
 
         updated = await self.entity_client.update(entity)
         logger.info("Prompt updated", extra={"workspace": updated.workspace, "prompt_name": updated.name})
diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py
index 9f01a2d1d2..86dd87cdbc 100644
--- a/services/core/models/src/nmp/core/models/api/v2/prompts.py
+++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py
@@ -45,8 +45,8 @@ def _sanitize_for_log(value: object) -> str:
 )
 async def list_prompts(
     workspace: str,
-    page: int = Query(default=1, description="Page number."),
-    page_size: int = Query(default=100, description="Page size."),
+    page: int = Query(default=1, ge=1, description="Page number."),
+    page_size: int = Query(default=100, ge=1, le=1000, description="Page size."),
     sort: PromptSort = Query(
         default=PromptSort.CREATED_AT_ASC,
         description="The field to sort by. To sort in decreasing order, use `-` in front of the field name.",
@@ -55,19 +55,19 @@ async def list_prompts(
     service: PromptService = Depends(get_prompt_service),
 ) -> Page[Prompt]:
     """List prompts for a specific workspace."""
-    # Extract workspace — inject from path param if not in filter
-    filter_workspace = parsed_filter.remove("workspace") or workspace
+    # Discard any workspace override in the filter — always scope to the path workspace.
+    parsed_filter.remove("workspace")
     try:
         return await service.list_prompts(
-            workspace=filter_workspace,
+            workspace=workspace,
             page=page,
             page_size=page_size,
             sort=sort,
             filter_operation=parsed_filter.operation,
         )
-    except Exception as e:
+    except Exception:
         logger.exception(f"Failed to list prompts for workspace {_sanitize_for_log(workspace)}")
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error")
 
 
 @router.post(
@@ -98,10 +98,10 @@ async def create_prompt(
                 detail=f"Prompt with workspace '{workspace}' and name '{request.name}' already exists",
             )
         logger.warning(f"Prompt creation validation error: {e}")
-        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
-    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid prompt data")
+    except Exception:
         logger.exception("Failed to create prompt")
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error")
 
 
 @router.get(
@@ -128,9 +128,9 @@ async def get_prompt(
         return prompt
     except HTTPException:
         raise
-    except Exception as e:
+    except Exception:
         logger.exception(f"Failed to get prompt {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}")
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error")
 
 
 @router.put(
@@ -160,9 +160,9 @@ async def update_prompt(
         raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e))
     except HTTPException:
         raise
-    except Exception as e:
+    except Exception:
         logger.exception("Failed to update prompt")
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error")
 
 
 @router.delete(
@@ -189,6 +189,6 @@ async def delete_prompt(
         return None
     except HTTPException:
         raise
-    except Exception as e:
+    except Exception:
         logger.exception(f"Failed to delete prompt {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}")
-        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error")
diff --git a/services/core/models/src/nmp/core/models/schemas.py b/services/core/models/src/nmp/core/models/schemas.py
index 47e060ba49..546de3570b 100644
--- a/services/core/models/src/nmp/core/models/schemas.py
+++ b/services/core/models/src/nmp/core/models/schemas.py
@@ -757,7 +757,6 @@ class ChatCompletionTool(BaseModel):
     """An OpenAI-compatible tool definition (currently always a function tool)."""
 
     type: Literal["function"] = Field(
-        default="function",
         description="The type of the tool. Currently only 'function' is supported.",
     )
     function: FunctionDefinition = Field(description="The function definition for this tool.")
diff --git a/services/core/models/tests/unit/api/test_prompts_api.py b/services/core/models/tests/unit/api/test_prompts_api.py
index fd30eabba8..da8f30cd8a 100644
--- a/services/core/models/tests/unit/api/test_prompts_api.py
+++ b/services/core/models/tests/unit/api/test_prompts_api.py
@@ -105,6 +105,25 @@ def test_list_prompts_with_name_filter(client, mock_prompt_service, sample_page)
     assert mock_prompt_service.list_prompts.call_args.kwargs.get("filter_operation") is not None
 
 
+def test_list_prompts_workspace_filter_cannot_override_path(client, mock_prompt_service, sample_page):
+    mock_prompt_service.list_prompts.return_value = sample_page
+
+    response = client.get("/apis/models/v2/workspaces/default/prompts?filter[workspace][]=other")
+
+    assert response.status_code == 200
+    assert mock_prompt_service.list_prompts.call_args.kwargs["workspace"] == "default"
+
+
+def test_list_prompts_invalid_page_returns_422(client):
+    response = client.get("/apis/models/v2/workspaces/default/prompts?page=0")
+    assert response.status_code == 422
+
+
+def test_list_prompts_invalid_page_size_returns_422(client):
+    response = client.get("/apis/models/v2/workspaces/default/prompts?page_size=0")
+    assert response.status_code == 422
+
+
 def test_list_prompts_response_structure(client, mock_prompt_service, sample_page):
     mock_prompt_service.list_prompts.return_value = sample_page
 
diff --git a/services/core/models/tests/unit/test_prompt_service_unit.py b/services/core/models/tests/unit/test_prompt_service_unit.py
index 68531fe3b8..37b6415d31 100644
--- a/services/core/models/tests/unit/test_prompt_service_unit.py
+++ b/services/core/models/tests/unit/test_prompt_service_unit.py
@@ -62,11 +62,12 @@ def sample_messages() -> list[PromptMessage]:
 def sample_tools() -> list[ChatCompletionTool]:
     return [
         ChatCompletionTool(
+            type="function",
             function=FunctionDefinition(
                 name="get_weather",
                 description="Get the current weather for a city.",
                 parameters={"type": "object", "properties": {"city": {"type": "string"}}},
-            )
+            ),
         )
     ]
 
@@ -198,6 +199,21 @@ async def test_update_prompt_success(prompt_service, mock_entity_client, sample_
     assert updated_entity.tool_choice is None
 
 
+@pytest.mark.asyncio
+async def test_update_prompt_clears_tags_when_omitted(prompt_service, mock_entity_client, sample_prompt_entity):
+    """Test that omitting tags in an update replaces them with an empty list (full replacement)."""
+    sample_prompt_entity.tags = ["old-tag"]
+    mock_entity_client.get.return_value = sample_prompt_entity
+    mock_entity_client.update.return_value = sample_prompt_entity
+
+    request = UpdatePromptRequest(description="no tags")
+
+    await prompt_service.update_prompt("default", "summarizer", request)
+
+    updated_entity = mock_entity_client.update.call_args[0][0]
+    assert updated_entity.tags == []
+
+
 @pytest.mark.asyncio
 async def test_update_prompt_not_found(prompt_service, mock_entity_client):
     """Test that updating a missing prompt returns None."""

From e79c921a7fd5894c45770d1771e7024da96c59e3 Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Tue, 9 Jun 2026 16:19:40 -0700
Subject: [PATCH 06/10] run lint fix

Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 openapi/ga/individual/platform.openapi.yaml   |  5 ++-
 openapi/ga/openapi.yaml                       |  5 ++-
 openapi/openapi.yaml                          |  5 ++-
 sdk/stainless.yaml                            |  1 +
 .../nmp/core/auth/assets/static-authz.yaml    | 36 +++++++++++++++++++
 5 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/openapi/ga/individual/platform.openapi.yaml b/openapi/ga/individual/platform.openapi.yaml
index 96e7ade804..8475d741a0 100644
--- a/openapi/ga/individual/platform.openapi.yaml
+++ b/openapi/ga/individual/platform.openapi.yaml
@@ -6598,6 +6598,7 @@ paths:
         required: false
         schema:
           type: integer
+          minimum: 1
           description: Page number.
           default: 1
           title: Page
@@ -6607,6 +6608,8 @@ paths:
         required: false
         schema:
           type: integer
+          maximum: 1000
+          minimum: 1
           description: Page size.
           default: 100
           title: Page Size
@@ -8407,13 +8410,13 @@ components:
           const: function
           title: Type
           description: The type of the tool. Currently only 'function' is supported.
-          default: function
         function:
           allOf:
           - $ref: '#/components/schemas/FunctionDefinition'
           description: The function definition for this tool.
       type: object
       required:
+      - type
       - function
       title: ChatCompletionTool
       description: An OpenAI-compatible tool definition (currently always a function
diff --git a/openapi/ga/openapi.yaml b/openapi/ga/openapi.yaml
index 96e7ade804..8475d741a0 100644
--- a/openapi/ga/openapi.yaml
+++ b/openapi/ga/openapi.yaml
@@ -6598,6 +6598,7 @@ paths:
         required: false
         schema:
           type: integer
+          minimum: 1
           description: Page number.
           default: 1
           title: Page
@@ -6607,6 +6608,8 @@ paths:
         required: false
         schema:
           type: integer
+          maximum: 1000
+          minimum: 1
           description: Page size.
           default: 100
           title: Page Size
@@ -8407,13 +8410,13 @@ components:
           const: function
           title: Type
           description: The type of the tool. Currently only 'function' is supported.
-          default: function
         function:
           allOf:
           - $ref: '#/components/schemas/FunctionDefinition'
           description: The function definition for this tool.
       type: object
       required:
+      - type
       - function
       title: ChatCompletionTool
       description: An OpenAI-compatible tool definition (currently always a function
diff --git a/openapi/openapi.yaml b/openapi/openapi.yaml
index 96e7ade804..8475d741a0 100644
--- a/openapi/openapi.yaml
+++ b/openapi/openapi.yaml
@@ -6598,6 +6598,7 @@ paths:
         required: false
         schema:
           type: integer
+          minimum: 1
           description: Page number.
           default: 1
           title: Page
@@ -6607,6 +6608,8 @@ paths:
         required: false
         schema:
           type: integer
+          maximum: 1000
+          minimum: 1
           description: Page size.
           default: 100
           title: Page Size
@@ -8407,13 +8410,13 @@ components:
           const: function
           title: Type
           description: The type of the tool. Currently only 'function' is supported.
-          default: function
         function:
           allOf:
           - $ref: '#/components/schemas/FunctionDefinition'
           description: The function definition for this tool.
       type: object
       required:
+      - type
       - function
       title: ChatCompletionTool
       description: An OpenAI-compatible tool definition (currently always a function
diff --git a/sdk/stainless.yaml b/sdk/stainless.yaml
index e000ff3ae4..c695227c70 100644
--- a/sdk/stainless.yaml
+++ b/sdk/stainless.yaml
@@ -721,6 +721,7 @@ resources:
       tool_calling_metadata_content: ToolCallingMetadataContent
       backend_format: BackendFormat
       finetuning_type: FinetuningType
+      inference_params: InferenceParams
   iam:
     standalone_api: true
     subresources:
diff --git a/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml b/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml
index be49cf053d..13d6dd6da8 100644
--- a/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml
+++ b/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml
@@ -298,6 +298,7 @@ authz:
       - models.adapters.list
       - models.adapters.read
       - models.list
+      - models.prompts.read
       - models.read
       - projects.list
       - projects.read
@@ -349,6 +350,9 @@ authz:
       - models.adapters.update
       - models.create
       - models.delete
+      - models.prompts.create
+      - models.prompts.delete
+      - models.prompts.update
       - models.update
       - projects.create
       - projects.delete
@@ -1338,6 +1342,38 @@ authz:
         scopes:
         - models:write
         - platform:write
+    /apis/models/v2/workspaces/{workspace}/prompts:
+      get:
+        permissions:
+        - models.prompts.read
+        scopes:
+        - models:read
+        - platform:read
+      post:
+        permissions:
+        - models.prompts.create
+        scopes:
+        - models:write
+        - platform:write
+    /apis/models/v2/workspaces/{workspace}/prompts/{name}:
+      delete:
+        permissions:
+        - models.prompts.delete
+        scopes:
+        - models:write
+        - platform:write
+      get:
+        permissions:
+        - models.prompts.read
+        scopes:
+        - models:read
+        - platform:read
+      put:
+        permissions:
+        - models.prompts.update
+        scopes:
+        - models:write
+        - platform:write
     /apis/models/v2/workspaces/{workspace}/providers:
       get:
         permissions:

From e0e291b36abe10cf4f5fd1c87698a45d21a9aa70 Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Thu, 11 Jun 2026 17:17:48 -0700
Subject: [PATCH 07/10] fix stainless

Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 .../nemo-platform/.nmpcontext/openapi.yaml    | 563 +++++++++++++
 .../nemo-platform/.nmpcontext/stainless.yaml  | 190 +++--
 sdk/python/nemo-platform/api.md               |   1 -
 .../src/nemo_platform/resources/files/api.md  |   2 +-
 .../nemo_platform/resources/files/filesets.py |  11 +-
 .../nemo_platform/resources/guardrail/api.md  |   5 +
 .../resources/inference/__init__.py           |  14 +
 .../nemo_platform/resources/inference/api.md  |  27 +
 .../resources/inference/inference.py          |  32 +
 .../resources/inference/prompts.py            | 743 ++++++++++++++++++
 .../src/nemo_platform/resources/jobs/api.md   |   5 +
 .../src/nemo_platform/resources/jobs/jobs.py  |   1 +
 .../src/nemo_platform/types/__init__.py       |   1 -
 .../src/nemo_platform/types/files/__init__.py |   2 +
 .../src/nemo_platform/types/files/fileset.py  |   2 +-
 .../types/files/fileset_create_params.py      |   4 +-
 .../{shared => files}/fileset_metadata.py     |   4 +-
 .../fileset_metadata_param.py}                |   8 +-
 .../types/files/fileset_update_params.py      |   4 +-
 .../nemo_platform/types/inference/__init__.py |  14 +
 .../types/inference/chat_completion_tool.py   |  37 +
 .../inference/chat_completion_tool_param.py   |  38 +
 .../types/inference/function_definition.py    |  45 ++
 .../inference/function_definition_param.py    |  46 ++
 .../nemo_platform/types/inference/prompt.py   |  96 +++
 .../types/inference/prompt_create_params.py   |  64 ++
 .../types/inference/prompt_filter_param.py    |  46 ++
 .../types/inference/prompt_list_params.py     |  47 ++
 .../types/inference/prompt_message.py         |  39 +
 .../types/inference/prompt_message_param.py   |  42 +
 .../types/inference/prompt_message_role.py    |  22 +
 .../types/inference/prompt_sort.py            |  22 +
 .../types/inference/prompt_update_params.py   |  57 ++
 .../types/inference/prompts_page.py           |  37 +
 .../nemo_platform/types/shared/__init__.py    |   1 -
 .../types/shared_params/__init__.py           |   1 -
 .../api_resources/inference/test_prompts.py   | 741 +++++++++++++++++
 sdk/stainless.yaml                            | 173 ++--
 38 files changed, 2993 insertions(+), 194 deletions(-)
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/resources/inference/prompts.py
 rename sdk/python/nemo-platform/src/nemo_platform/types/{shared => files}/fileset_metadata.py (91%)
 rename sdk/python/nemo-platform/src/nemo_platform/types/{shared_params/fileset_metadata.py => files/fileset_metadata_param.py} (85%)
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool_param.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition_param.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_create_params.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_filter_param.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_list_params.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_param.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_role.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_sort.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_update_params.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompts_page.py
 create mode 100644 sdk/python/nemo-platform/tests/api_resources/inference/test_prompts.py

diff --git a/sdk/python/nemo-platform/.nmpcontext/openapi.yaml b/sdk/python/nemo-platform/.nmpcontext/openapi.yaml
index dba0e62e51..9044b2eaef 100644
--- a/sdk/python/nemo-platform/.nmpcontext/openapi.yaml
+++ b/sdk/python/nemo-platform/.nmpcontext/openapi.yaml
@@ -6661,6 +6661,205 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/HTTPValidationError'
+  /apis/models/v2/workspaces/{workspace}/prompts:
+    get:
+      tags:
+      - Prompts
+      summary: List Prompts By Workspace
+      description: List prompts for a specific workspace.
+      operationId: list_prompts_apis_models_v2_workspaces__workspace__prompts_get
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: page
+        in: query
+        required: false
+        schema:
+          type: integer
+          minimum: 1
+          description: Page number.
+          default: 1
+          title: Page
+        description: Page number.
+      - name: page_size
+        in: query
+        required: false
+        schema:
+          type: integer
+          maximum: 1000
+          minimum: 1
+          description: Page size.
+          default: 100
+          title: Page Size
+        description: Page size.
+      - name: sort
+        in: query
+        required: false
+        schema:
+          allOf:
+          - $ref: '#/components/schemas/PromptSort'
+          description: The field to sort by. To sort in decreasing order, use `-`
+            in front of the field name.
+          default: created_at
+        description: The field to sort by. To sort in decreasing order, use `-` in
+          front of the field name.
+      - in: query
+        name: filter
+        style: deepObject
+        required: false
+        explode: true
+        schema:
+          $ref: '#/components/schemas/PromptFilter'
+        description: Filter prompts by workspace, project, name, description, created_at,
+          and updated_at.
+      responses:
+        '200':
+          description: Return prompts for a workspace
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PromptsPage'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    post:
+      tags:
+      - Prompts
+      summary: Create Prompt
+      description: Create a new prompt.
+      operationId: create_prompt_apis_models_v2_workspaces__workspace__prompts_post
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreatePromptRequest'
+      responses:
+        '201':
+          description: Create a new prompt
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+  /apis/models/v2/workspaces/{workspace}/prompts/{name}:
+    get:
+      tags:
+      - Prompts
+      summary: Get Prompt
+      description: Get a prompt by workspace and name.
+      operationId: get_prompt_apis_models_v2_workspaces__workspace__prompts__name__get
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      responses:
+        '200':
+          description: Return prompt details
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    put:
+      tags:
+      - Prompts
+      summary: Update Prompt
+      description: Update an existing prompt (full replacement of mutable fields).
+      operationId: update_prompt_apis_models_v2_workspaces__workspace__prompts__name__put
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/UpdatePromptRequest'
+      responses:
+        '200':
+          description: Update an existing prompt
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Prompt'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+    delete:
+      tags:
+      - Prompts
+      summary: Delete Prompt
+      description: Delete a prompt by workspace and name.
+      operationId: delete_prompt_apis_models_v2_workspaces__workspace__prompts__name__delete
+      parameters:
+      - name: workspace
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Workspace
+      - name: name
+        in: path
+        required: true
+        schema:
+          type: string
+          title: Name
+      responses:
+        '204':
+          description: Delete a prompt
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
   /apis/models/v2/workspaces/{workspace}/providers:
     get:
       tags:
@@ -8294,6 +8493,24 @@ components:
       - role
       title: ChatCompletionSystemMessageParam
       description: System message parameter for chat completion.
+    ChatCompletionTool:
+      properties:
+        type:
+          type: string
+          const: function
+          title: Type
+          description: The type of the tool. Currently only 'function' is supported.
+        function:
+          allOf:
+          - $ref: '#/components/schemas/FunctionDefinition'
+          description: The function definition for this tool.
+      type: object
+      required:
+      - type
+      - function
+      title: ChatCompletionTool
+      description: An OpenAI-compatible tool definition (currently always a function
+        tool).
     ChatCompletionToolMessageParam:
       properties:
         content:
@@ -9083,6 +9300,65 @@ components:
       - source
       title: CreatePlatformJobRequest
       description: Request model for creating a new platform job.
+    CreatePromptRequest:
+      properties:
+        name:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Name
+          description: 'Name of the prompt. Allowed characters: letters (a-z, A-Z),
+            digits (0-9), underscores, hyphens, and dots.'
+          examples:
+          - support-bot-system
+          - summarizer
+        project:
+          title: Project
+          description: The URN of the project associated with this prompt.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        description:
+          title: Description
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+        tools:
+          title: Tools
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+        response_format:
+          title: Response Format
+          additionalProperties: true
+          type: object
+        inference_params:
+          $ref: '#/components/schemas/InferenceParams'
+        tags:
+          title: Tags
+          items:
+            type: string
+          type: array
+      type: object
+      required:
+      - name
+      title: CreatePromptRequest
+      description: Request model for creating a Prompt.
     CreateVirtualModelRequest:
       properties:
         default_model_entity:
@@ -10716,6 +10992,39 @@ components:
       - name
       title: FunctionCall
       description: Function call information.
+    FunctionDefinition:
+      properties:
+        name:
+          type: string
+          maxLength: 255
+          title: Name
+          description: The name of the function to be called.
+        description:
+          title: Description
+          description: A description of what the function does, used by the model
+            to decide when and how to call it.
+          type: string
+        parameters:
+          title: Parameters
+          description: The parameters the function accepts, described as a JSON Schema
+            object.
+          additionalProperties: true
+          type: object
+        strict:
+          title: Strict
+          description: Whether to enforce strict schema adherence when generating
+            the function call.
+          type: boolean
+      type: object
+      required:
+      - name
+      title: FunctionDefinition
+      description: 'An OpenAI-compatible function definition for tool calling.
+
+
+        Mirrors the ``function`` object the Inference Gateway forwards to
+
+        OpenAI-compatible backends.'
     GLiNERDetection:
       properties:
         server_endpoint:
@@ -15269,6 +15578,110 @@ components:
       required:
       - data
       title: ProjectsPage
+    Prompt:
+      properties:
+        id:
+          type: string
+          title: Id
+          description: Unique identifier for the prompt.
+        name:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Name
+          description: 'Name of the entity. Name/workspace combo must be unique across
+            all entities. Allowed characters: letters (a-z, A-Z), digits (0-9), underscores,
+            hyphens, and dots.'
+          examples:
+          - llama-3.1-8b
+          - my-custom-model
+        workspace:
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-.]+$
+          title: Workspace
+          description: 'The workspace of the entity. Allowed characters: letters (a-z,
+            A-Z), digits (0-9), underscores, hyphens, and dots.'
+        project:
+          title: Project
+          description: The URN of the project associated with this entity.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        created_at:
+          type: string
+          format: date-time
+          title: Created At
+          description: The timestamp of model entity creation
+        updated_at:
+          type: string
+          format: date-time
+          title: Updated At
+          description: The timestamp of the last model entity update
+        description:
+          title: Description
+          description: Optional description of the prompt.
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+          description: Ordered list of chat messages that make up the prompt.
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+          description: Names of the Jinja2 template variables the prompt expects.
+        tools:
+          title: Tools
+          description: Optional OpenAI-compatible tool definitions to send with the
+            prompt.
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+          description: 'Controls which (if any) tool is called: ''none'', ''auto'',
+            ''required'', or a named-tool object.'
+        response_format:
+          title: Response Format
+          description: Optional OpenAI-compatible response_format, e.g. a json_schema
+            structured-output spec.
+          additionalProperties: true
+          type: object
+        inference_params:
+          allOf:
+          - $ref: '#/components/schemas/InferenceParams'
+          description: Optional default model and sampling parameters (temperature,
+            top_p, max_tokens, ...).
+        tags:
+          items:
+            type: string
+          type: array
+          title: Tags
+          description: Optional free-form tags for organizing prompts.
+      type: object
+      required:
+      - name
+      - workspace
+      - created_at
+      - updated_at
+      title: Prompt
+      description: 'A reusable, stored chat prompt.
+
+
+        A Prompt captures the messages, declared template variables, optional tool
+
+        definitions, and default inference parameters needed to invoke a model
+
+        through the Inference Gateway. The unique identifier is workspace/name.'
     PromptData:
       properties:
         system_prompt:
@@ -15291,6 +15704,106 @@ components:
       type: object
       title: PromptData
       description: Configuration for prompt engineering.
+    PromptFilter:
+      additionalProperties: false
+      description: Filter for Prompt queries.
+      properties:
+        workspace:
+          description: Filter by workspace.
+          title: Workspace
+          type: string
+        project:
+          description: Filter by project URN.
+          title: Project
+          type: string
+        name:
+          description: Filter by name.
+          title: Name
+          type: string
+        description:
+          description: Filter by description.
+          title: Description
+          type: string
+        created_at:
+          allOf:
+          - $ref: '#/components/schemas/DatetimeFilter'
+          description: Filter by creation date.
+        updated_at:
+          allOf:
+          - $ref: '#/components/schemas/DatetimeFilter'
+          description: Filter by update date.
+      title: PromptFilter
+      type: object
+    PromptMessage:
+      properties:
+        role:
+          allOf:
+          - $ref: '#/components/schemas/PromptMessageRole'
+          description: The role of the message author.
+        content:
+          type: string
+          title: Content
+          description: Templated message content. May contain template variables.
+      type: object
+      required:
+      - role
+      - content
+      title: PromptMessage
+      description: 'A single templated message in a chat prompt.
+
+
+        ``content`` is a Jinja2 template body that may reference the prompt''s
+
+        declared ``input_variables`` (e.g. ``{{ topic }}``).'
+    PromptMessageRole:
+      type: string
+      enum:
+      - system
+      - developer
+      - user
+      - assistant
+      title: PromptMessageRole
+      description: 'Role of a message author in a chat prompt.
+
+
+        Follows the OpenAI chat schema the Inference Gateway speaks
+
+        (``/v1/chat/completions``).'
+    PromptSort:
+      type: string
+      enum:
+      - name
+      - -name
+      - created_at
+      - -created_at
+      - updated_at
+      - -updated_at
+      title: PromptSort
+      description: Sort fields for Prompt queries.
+    PromptsPage:
+      properties:
+        data:
+          items:
+            $ref: '#/components/schemas/Prompt'
+          type: array
+          title: Data
+        pagination:
+          allOf:
+          - $ref: '#/components/schemas/PaginationData'
+          description: Pagination information.
+        sort:
+          title: Sort
+          description: The field on which the results are sorted.
+          type: string
+        filter:
+          title: Filter
+          description: Filtering information.
+          additionalProperties: true
+          type: object
+      type: object
+      required:
+      - data
+      title: PromptsPage
     RailStatus:
       properties:
         status:
@@ -17159,6 +17672,56 @@ components:
 
 
         This endpoint supports partial updates for fields managed by Models Controller.'
+    UpdatePromptRequest:
+      properties:
+        project:
+          title: Project
+          description: The URN of the project associated with this prompt.
+          type: string
+          maxLength: 255
+          pattern: ^[\w\-./]+$
+        description:
+          title: Description
+          type: string
+          maxLength: 1000
+        messages:
+          items:
+            $ref: '#/components/schemas/PromptMessage'
+          type: array
+          title: Messages
+        input_variables:
+          items:
+            type: string
+          type: array
+          title: Input Variables
+        tools:
+          title: Tools
+          items:
+            $ref: '#/components/schemas/ChatCompletionTool'
+          type: array
+        tool_choice:
+          anyOf:
+          - type: string
+          - additionalProperties: true
+            type: object
+          title: Tool Choice
+        response_format:
+          title: Response Format
+          additionalProperties: true
+          type: object
+        inference_params:
+          $ref: '#/components/schemas/InferenceParams'
+        tags:
+          title: Tags
+          items:
+            type: string
+          type: array
+      type: object
+      title: UpdatePromptRequest
+      description: 'Request model for replacing a Prompt''s mutable fields (full update).
+
+
+        The prompt name and workspace come from the URL path and cannot be changed.'
     UpdateVirtualModelRequest:
       properties:
         default_model_entity:
diff --git a/sdk/python/nemo-platform/.nmpcontext/stainless.yaml b/sdk/python/nemo-platform/.nmpcontext/stainless.yaml
index 7d5a9da0e6..e96b34b9e4 100644
--- a/sdk/python/nemo-platform/.nmpcontext/stainless.yaml
+++ b/sdk/python/nemo-platform/.nmpcontext/stainless.yaml
@@ -26,16 +26,16 @@ custom_casings:
     initialism: true
 
 targets:
-#  typescript:
-#    _skip_running_tests: false
-#    package_name: nemo-platform-v1
-#    publish:
-#      npm: false
-#    skip: false
-#    options:
-#      mcp_server:
-#        package_name: nemo-platform-v1-mcp
-#        enable_all_resources: true
+  #  typescript:
+  #    _skip_running_tests: false
+  #    package_name: nemo-platform-v1
+  #    publish:
+  #      npm: false
+  #    skip: false
+  #    options:
+  #      mcp_server:
+  #        package_name: nemo-platform-v1-mcp
+  #        enable_all_resources: true
 
   python:
     # [docs]: https://www.stainless.com/docs/reference/editions
@@ -81,86 +81,86 @@ client_settings:
 # `pagination` defines [pagination schemes] which provides a template to match
 # endpoints and generate next-page and auto-pagination helpers in the SDKs.
 pagination:
-- name: default_pagination
-  type: page_number
-  request:
-    page:
-      type: integer
-      x-stainless-pagination-property:
-        purpose: page_number_param
-    page_size:
-      type: integer
-  response:
-    data:
-      type: array
-      x-stainless-pagination-property:
-        purpose: items
-      items:
+  - name: default_pagination
+    type: page_number
+    request:
+      page:
+        type: integer
+        x-stainless-pagination-property:
+          purpose: page_number_param
+      page_size:
+        type: integer
+    response:
+      data:
+        type: array
+        x-stainless-pagination-property:
+          purpose: items
+        items:
+          type: object
+          additionalProperties: true
+      pagination:
         type: object
-        additionalProperties: true
-    pagination:
-      type: object
-      properties:
-        page:
-          type: integer
-          title: Page
-          description: The current page number.
-          x-stainless-pagination-property:
-            purpose: current_page_number_field
-        page_size:
-          type: integer
-          title: Page Size
-          description: The page size used for the query.
-        current_page_size:
-          type: integer
-          title: Current Page Size
-          description: The size for the current page.
-        total_pages:
-          type: integer
-          title: Total Pages
-          description: The total number of pages.
-          x-stainless-pagination-property:
-            purpose: total_page_count_field
-        total_results:
-          type: integer
-          title: Total Results
-          description: The total number of results.
-      required:
-      - page
-      - page_size
-      - total_pages
-      - total_results
-      - current_page_size
-- name: logs_pagination
-  type: cursor
-  request:
-    limit:
-      type: integer
-    page_cursor:
-      type: string
-      x-stainless-pagination-property:
-        purpose: next_cursor_param
-  response:
-    data:
-      type: array
-      x-stainless-pagination-property:
-        purpose: items
-      items:
-        type: object
-        additionalProperties: true
-    next_page:
-      type: string
-      x-stainless-pagination-property:
-        purpose: next_cursor_field
+        properties:
+          page:
+            type: integer
+            title: Page
+            description: The current page number.
+            x-stainless-pagination-property:
+              purpose: current_page_number_field
+          page_size:
+            type: integer
+            title: Page Size
+            description: The page size used for the query.
+          current_page_size:
+            type: integer
+            title: Current Page Size
+            description: The size for the current page.
+          total_pages:
+            type: integer
+            title: Total Pages
+            description: The total number of pages.
+            x-stainless-pagination-property:
+              purpose: total_page_count_field
+          total_results:
+            type: integer
+            title: Total Results
+            description: The total number of results.
+        required:
+          - page
+          - page_size
+          - total_pages
+          - total_results
+          - current_page_size
+  - name: logs_pagination
+    type: cursor
+    request:
+      limit:
+        type: integer
+      page_cursor:
+        type: string
+        x-stainless-pagination-property:
+          purpose: next_cursor_param
+    response:
+      data:
+        type: array
+        x-stainless-pagination-property:
+          purpose: items
+        items:
+          type: object
+          additionalProperties: true
+      next_page:
+        type: string
+        x-stainless-pagination-property:
+          purpose: next_cursor_field
 
 streaming:
   on_event:
-  - data_starts_with: "[DONE]"
-    handle: done
-  - event_type: error
-    handle: error
-  - event_type:
-    handle: yield
+    - data_starts_with: "[DONE]"
+      handle: done
+    - event_type: error
+      handle: error
+    - event_type:
+      handle: yield
 
 readme:
   example_requests:
@@ -464,6 +464,24 @@ resources:
           update: put /apis/models/v2/workspaces/{workspace}/providers/{name}
           delete: delete /apis/models/v2/workspaces/{workspace}/providers/{name}
           update_status: put /apis/models/v2/workspaces/{workspace}/providers/{name}/status
+      prompts:
+        models:
+          chat_completion_tool: ChatCompletionTool
+          create_prompt_request: CreatePromptRequest
+          function_definition: FunctionDefinition
+          prompt: Prompt
+          prompt_filter: PromptFilter
+          prompt_message: PromptMessage
+          prompt_message_role: PromptMessageRole
+          prompt_sort: PromptSort
+          prompts_page: PromptsPage
+          update_prompt_request: UpdatePromptRequest
+        methods:
+          list: get /apis/models/v2/workspaces/{workspace}/prompts
+          create: post /apis/models/v2/workspaces/{workspace}/prompts
+          retrieve: get /apis/models/v2/workspaces/{workspace}/prompts/{name}
+          update: put /apis/models/v2/workspaces/{workspace}/prompts/{name}
+          delete: delete /apis/models/v2/workspaces/{workspace}/prompts/{name}
       gateway:
         subresources:
           openai:
diff --git a/sdk/python/nemo-platform/api.md b/sdk/python/nemo-platform/api.md
index 271d51d2df..a0e07c72cd 100644
--- a/sdk/python/nemo-platform/api.md
+++ b/sdk/python/nemo-platform/api.md
@@ -10,7 +10,6 @@ from nemo_platform.types import (
     DatetimeFilter,
     DeleteResponse,
     FileStorageType,
-    FilesetMetadata,
     FinetuningType,
     GenericSortField,
     HTTPValidationError,
diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md b/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md
index 72e7b5ca66..882f649add 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md
+++ b/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md
@@ -33,7 +33,7 @@ Methods:
 Types:
 
 ```python
-from nemo_platform.types.files import FilesetFilter
+from nemo_platform.types.files import FilesetFilter, FilesetMetadata, FilesetMetadataParam
 ```
 
 Methods:
diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py b/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py
index f8fb167cf2..018acd45b4 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py
@@ -34,6 +34,7 @@
 from ...pagination import SyncDefaultPagination, AsyncDefaultPagination
 from ...types.files import (
     FilesetPurpose,
+    FilesetMetadataParam,
     fileset_list_params,
     fileset_create_params,
     fileset_update_params,
@@ -43,7 +44,7 @@
 from ...types.files.fileset_purpose import FilesetPurpose
 from ...types.shared.generic_sort_field import GenericSortField
 from ...types.files.fileset_filter_param import FilesetFilterParam
-from ...types.shared_params.fileset_metadata import FilesetMetadata
+from ...types.files.fileset_metadata_param import FilesetMetadataParam
 from ..._exceptions import ConflictError
 
 __all__ = ["FilesetsResource", "AsyncFilesetsResource"]
@@ -77,7 +78,7 @@ def create(
         cache: bool | Omit = omit,
         custom_fields: Dict[str, object] | Omit = omit,
         description: str | Omit = omit,
-        metadata: FilesetMetadata | Omit = omit,
+        metadata: FilesetMetadataParam | Omit = omit,
         project: str | Omit = omit,
         purpose: FilesetPurpose | Omit = omit,
         storage: fileset_create_params.Storage | Omit = omit,
@@ -206,7 +207,7 @@ def update(
         workspace: str | None = None,
         custom_fields: Dict[str, object] | Omit = omit,
         description: str | Omit = omit,
-        metadata: FilesetMetadata | Omit = omit,
+        metadata: FilesetMetadataParam | Omit = omit,
         project: str | Omit = omit,
         purpose: FilesetPurpose | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -402,7 +403,7 @@ async def create(
         cache: bool | Omit = omit,
         custom_fields: Dict[str, object] | Omit = omit,
         description: str | Omit = omit,
-        metadata: FilesetMetadata | Omit = omit,
+        metadata: FilesetMetadataParam | Omit = omit,
         project: str | Omit = omit,
         purpose: FilesetPurpose | Omit = omit,
         storage: fileset_create_params.Storage | Omit = omit,
@@ -531,7 +532,7 @@ async def update(
         workspace: str | None = None,
         custom_fields: Dict[str, object] | Omit = omit,
         description: str | Omit = omit,
-        metadata: FilesetMetadata | Omit = omit,
+        metadata: FilesetMetadataParam | Omit = omit,
         project: str | Omit = omit,
         purpose: FilesetPurpose | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/guardrail/api.md b/sdk/python/nemo-platform/src/nemo_platform/resources/guardrail/api.md
index 52c2cf31fd..4fe014901d 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/resources/guardrail/api.md
+++ b/sdk/python/nemo-platform/src/nemo_platform/resources/guardrail/api.md
@@ -60,14 +60,19 @@ from nemo_platform.types.guardrail import (
     PangeaRailOptions,
     PatronusEvaluateAPIParams,
     PatronusEvaluateConfig,
+    PatronusEvaluateConfigParam,
     PatronusEvaluationSuccessStrategy,
     PatronusRailConfig,
+    PatronusRailConfigParam,
     PrivateAIDetection,
     PrivateAIDetectionOptions,
     RailStatus,
     Rails,
     RailsConfig,
     RailsConfigData,
+    RailsConfigDataParam,
+    RailsConfigParam,
+    RailsParam,
     ReasoningConfig,
     RegexDetection,
     RegexDetectionOptions,
diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/__init__.py
index 83bee2909a..59c3f75914 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/__init__.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/__init__.py
@@ -31,6 +31,14 @@
     GatewayResourceWithStreamingResponse,
     AsyncGatewayResourceWithStreamingResponse,
 )
+from .prompts import (
+    PromptsResource,
+    AsyncPromptsResource,
+    PromptsResourceWithRawResponse,
+    AsyncPromptsResourceWithRawResponse,
+    PromptsResourceWithStreamingResponse,
+    AsyncPromptsResourceWithStreamingResponse,
+)
 from .inference import (
     InferenceResource,
     AsyncInferenceResource,
@@ -103,6 +111,12 @@
     "AsyncProvidersResourceWithRawResponse",
     "ProvidersResourceWithStreamingResponse",
     "AsyncProvidersResourceWithStreamingResponse",
+    "PromptsResource",
+    "AsyncPromptsResource",
+    "PromptsResourceWithRawResponse",
+    "AsyncPromptsResourceWithRawResponse",
+    "PromptsResourceWithStreamingResponse",
+    "AsyncPromptsResourceWithStreamingResponse",
     "GatewayResource",
     "AsyncGatewayResource",
     "GatewayResourceWithRawResponse",
diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/api.md b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/api.md
index b8a3fa9f6d..dca686fe43 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/api.md
+++ b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/api.md
@@ -146,6 +146,33 @@ Methods:
 - <code title="delete /apis/models/v2/workspaces/{workspace}/providers/{name}">client.inference.providers.<a href="./src/nemo_platform/resources/inference/providers.py">delete</a>(name, \*, workspace) -> None</code>
 - <code title="put /apis/models/v2/workspaces/{workspace}/providers/{name}/status">client.inference.providers.<a href="./src/nemo_platform/resources/inference/providers.py">update_status</a>(name, \*, workspace, \*\*<a href="src/nemo_platform/types/inference/provider_update_status_params.py">params</a>) -> <a href="./src/nemo_platform/types/inference/model_provider.py">ModelProvider</a></code>
 
+## Prompts
+
+Types:
+
+```python
+from nemo_platform.types.inference import (
+    ChatCompletionTool,
+    CreatePromptRequest,
+    FunctionDefinition,
+    Prompt,
+    PromptFilter,
+    PromptMessage,
+    PromptMessageRole,
+    PromptSort,
+    PromptsPage,
+    UpdatePromptRequest,
+)
+```
+
+Methods:
+
+- <code title="post /apis/models/v2/workspaces/{workspace}/prompts">client.inference.prompts.<a href="./src/nemo_platform/resources/inference/prompts.py">create</a>(\*, workspace, \*\*<a href="src/nemo_platform/types/inference/prompt_create_params.py">params</a>) -> <a href="./src/nemo_platform/types/inference/prompt.py">Prompt</a></code>
+- <code title="get /apis/models/v2/workspaces/{workspace}/prompts/{name}">client.inference.prompts.<a href="./src/nemo_platform/resources/inference/prompts.py">retrieve</a>(name, \*, workspace) -> <a href="./src/nemo_platform/types/inference/prompt.py">Prompt</a></code>
+- <code title="put /apis/models/v2/workspaces/{workspace}/prompts/{name}">client.inference.prompts.<a href="./src/nemo_platform/resources/inference/prompts.py">update</a>(name, \*, workspace, \*\*<a href="src/nemo_platform/types/inference/prompt_update_params.py">params</a>) -> <a href="./src/nemo_platform/types/inference/prompt.py">Prompt</a></code>
+- <code title="get /apis/models/v2/workspaces/{workspace}/prompts">client.inference.prompts.<a href="./src/nemo_platform/resources/inference/prompts.py">list</a>(\*, workspace, \*\*<a href="src/nemo_platform/types/inference/prompt_list_params.py">params</a>) -> <a href="./src/nemo_platform/types/inference/prompt.py">SyncDefaultPagination[Prompt]</a></code>
+- <code title="delete /apis/models/v2/workspaces/{workspace}/prompts/{name}">client.inference.prompts.<a href="./src/nemo_platform/resources/inference/prompts.py">delete</a>(name, \*, workspace) -> None</code>
+
 ## Gateway
 
 ### OpenAI
diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/inference.py b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/inference.py
index f9d80a024e..5abbe23ce3 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/inference.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/inference.py
@@ -25,6 +25,14 @@
     ModelsResourceWithStreamingResponse,
     AsyncModelsResourceWithStreamingResponse,
 )
+from .prompts import (
+    PromptsResource,
+    AsyncPromptsResource,
+    PromptsResourceWithRawResponse,
+    AsyncPromptsResourceWithRawResponse,
+    PromptsResourceWithStreamingResponse,
+    AsyncPromptsResourceWithStreamingResponse,
+)
 from ..._compat import cached_property
 from .providers import (
     ProvidersResource,
@@ -92,6 +100,10 @@ def deployments(self) -> DeploymentsResource:
     def providers(self) -> ProvidersResource:
         return ProvidersResource(self._client)
 
+    @cached_property
+    def prompts(self) -> PromptsResource:
+        return PromptsResource(self._client)
+
     @cached_property
     def gateway(self) -> GatewayResource:
         return GatewayResource(self._client)
@@ -137,6 +149,10 @@ def deployments(self) -> AsyncDeploymentsResource:
     def providers(self) -> AsyncProvidersResource:
         return AsyncProvidersResource(self._client)
 
+    @cached_property
+    def prompts(self) -> AsyncPromptsResource:
+        return AsyncPromptsResource(self._client)
+
     @cached_property
     def gateway(self) -> AsyncGatewayResource:
         return AsyncGatewayResource(self._client)
@@ -185,6 +201,10 @@ def deployments(self) -> DeploymentsResourceWithRawResponse:
     def providers(self) -> ProvidersResourceWithRawResponse:
         return ProvidersResourceWithRawResponse(self._inference.providers)
 
+    @cached_property
+    def prompts(self) -> PromptsResourceWithRawResponse:
+        return PromptsResourceWithRawResponse(self._inference.prompts)
+
     @cached_property
     def gateway(self) -> GatewayResourceWithRawResponse:
         return GatewayResourceWithRawResponse(self._inference.gateway)
@@ -214,6 +234,10 @@ def deployments(self) -> AsyncDeploymentsResourceWithRawResponse:
     def providers(self) -> AsyncProvidersResourceWithRawResponse:
         return AsyncProvidersResourceWithRawResponse(self._inference.providers)
 
+    @cached_property
+    def prompts(self) -> AsyncPromptsResourceWithRawResponse:
+        return AsyncPromptsResourceWithRawResponse(self._inference.prompts)
+
     @cached_property
     def gateway(self) -> AsyncGatewayResourceWithRawResponse:
         return AsyncGatewayResourceWithRawResponse(self._inference.gateway)
@@ -243,6 +267,10 @@ def deployments(self) -> DeploymentsResourceWithStreamingResponse:
     def providers(self) -> ProvidersResourceWithStreamingResponse:
         return ProvidersResourceWithStreamingResponse(self._inference.providers)
 
+    @cached_property
+    def prompts(self) -> PromptsResourceWithStreamingResponse:
+        return PromptsResourceWithStreamingResponse(self._inference.prompts)
+
     @cached_property
     def gateway(self) -> GatewayResourceWithStreamingResponse:
         return GatewayResourceWithStreamingResponse(self._inference.gateway)
@@ -272,6 +300,10 @@ def deployments(self) -> AsyncDeploymentsResourceWithStreamingResponse:
     def providers(self) -> AsyncProvidersResourceWithStreamingResponse:
         return AsyncProvidersResourceWithStreamingResponse(self._inference.providers)
 
+    @cached_property
+    def prompts(self) -> AsyncPromptsResourceWithStreamingResponse:
+        return AsyncPromptsResourceWithStreamingResponse(self._inference.prompts)
+
     @cached_property
     def gateway(self) -> AsyncGatewayResourceWithStreamingResponse:
         return AsyncGatewayResourceWithStreamingResponse(self._inference.gateway)
diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/prompts.py b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/prompts.py
new file mode 100644
index 0000000000..d27e1de6dc
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/prompts.py
@@ -0,0 +1,743 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+
+import httpx
+
+from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import path_template, maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...pagination import SyncDefaultPagination, AsyncDefaultPagination
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.inference import PromptSort, prompt_list_params, prompt_create_params, prompt_update_params
+from ...types.inference.prompt import Prompt
+from ...types.inference.prompt_sort import PromptSort
+from ...types.inference.prompt_filter_param import PromptFilterParam
+from ...types.inference.prompt_message_param import PromptMessageParam
+from ...types.shared_params.inference_params import InferenceParams
+from ...types.inference.chat_completion_tool_param import ChatCompletionToolParam
+from ..._exceptions import ConflictError
+
+__all__ = ["PromptsResource", "AsyncPromptsResource"]
+
+
+class PromptsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> PromptsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#accessing-raw-response-data-e-g-headers
+        """
+        return PromptsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PromptsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#with_streaming_response
+        """
+        return PromptsResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        workspace: str | None = None,
+        name: str,
+        description: str | Omit = omit,
+        inference_params: InferenceParams | Omit = omit,
+        input_variables: SequenceNotStr[str] | Omit = omit,
+        messages: Iterable[PromptMessageParam] | Omit = omit,
+        project: str | Omit = omit,
+        response_format: Dict[str, object] | Omit = omit,
+        tags: SequenceNotStr[str] | Omit = omit,
+        tool_choice: Union[str, Dict[str, object]] | Omit = omit,
+        tools: Iterable[ChatCompletionToolParam] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        exist_ok: bool = False,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Create a new prompt.
+
+        Args:
+          name: Name of the prompt.
+
+        Allowed characters: letters (a-z, A-Z), digits (0-9),
+              underscores, hyphens, and dots.
+
+          inference_params: Parameters for model inference. Extra fields can be supplied for additional
+              options applied to the inference request directly. Fields not supported by the
+              model may cause inference errors during evaluation.
+
+          project: The URN of the project associated with this prompt.
+
+
+          exist_ok: Do not raise an error if the resource already exists. Returns the existing resource.
+
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        try:
+            if workspace is None:
+                workspace = self._client._get_workspace_path_param()
+            if not workspace:
+                raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+            return self._post(
+                path_template("/apis/models/v2/workspaces/{workspace}/prompts", workspace=workspace),
+                body=maybe_transform(
+                    {
+                        "name": name,
+                        "description": description,
+                        "inference_params": inference_params,
+                        "input_variables": input_variables,
+                        "messages": messages,
+                        "project": project,
+                        "response_format": response_format,
+                        "tags": tags,
+                        "tool_choice": tool_choice,
+                        "tools": tools,
+                    },
+                    prompt_create_params.PromptCreateParams,
+                ),
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=Prompt,
+            )
+        except ConflictError:
+            if not exist_ok:
+                raise
+            return self.retrieve(name = name, workspace = workspace)
+
+    def retrieve(
+        self,
+        name: str,
+        *,
+        workspace: str | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """
+        Get a prompt by workspace and name.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if workspace is None:
+            workspace = self._client._get_workspace_path_param()
+        if not workspace:
+            raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+        if not name:
+            raise ValueError(f"Expected a non-empty value for `name` but received {name!r}")
+        return self._get(
+            path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+    def update(
+        self,
+        name: str,
+        *,
+        workspace: str | None = None,
+        description: str | Omit = omit,
+        inference_params: InferenceParams | Omit = omit,
+        input_variables: SequenceNotStr[str] | Omit = omit,
+        messages: Iterable[PromptMessageParam] | Omit = omit,
+        project: str | Omit = omit,
+        response_format: Dict[str, object] | Omit = omit,
+        tags: SequenceNotStr[str] | Omit = omit,
+        tool_choice: Union[str, Dict[str, object]] | Omit = omit,
+        tools: Iterable[ChatCompletionToolParam] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """
+        Update an existing prompt (full replacement of mutable fields).
+
+        Args:
+          inference_params: Parameters for model inference. Extra fields can be supplied for additional
+              options applied to the inference request directly. Fields not supported by the
+              model may cause inference errors during evaluation.
+
+          project: The URN of the project associated with this prompt.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if workspace is None:
+            workspace = self._client._get_workspace_path_param()
+        if not workspace:
+            raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+        if not name:
+            raise ValueError(f"Expected a non-empty value for `name` but received {name!r}")
+        return self._put(
+            path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name),
+            body=maybe_transform(
+                {
+                    "description": description,
+                    "inference_params": inference_params,
+                    "input_variables": input_variables,
+                    "messages": messages,
+                    "project": project,
+                    "response_format": response_format,
+                    "tags": tags,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                prompt_update_params.PromptUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+    def list(
+        self,
+        *,
+        workspace: str | None = None,
+        filter: PromptFilterParam | Omit = omit,
+        page: int | Omit = omit,
+        page_size: int | Omit = omit,
+        sort: PromptSort | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncDefaultPagination[Prompt]:
+        """
+        List prompts for a specific workspace.
+
+        Args:
+          filter: Filter prompts by workspace, project, name, description, created_at, and
+              updated_at.
+
+          page: Page number.
+
+          page_size: Page size.
+
+          sort: The field to sort by. To sort in decreasing order, use `-` in front of the field
+              name.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if workspace is None:
+            workspace = self._client._get_workspace_path_param()
+        if not workspace:
+            raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+        return self._get_api_list(
+            path_template("/apis/models/v2/workspaces/{workspace}/prompts", workspace=workspace),
+            page=SyncDefaultPagination[Prompt],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "filter": filter,
+                        "page": page,
+                        "page_size": page_size,
+                        "sort": sort,
+                    },
+                    prompt_list_params.PromptListParams,
+                ),
+            ),
+            model=Prompt,
+        )
+
+    def delete(
+        self,
+        name: str,
+        *,
+        workspace: str | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete a prompt by workspace and name.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if workspace is None:
+            workspace = self._client._get_workspace_path_param()
+        if not workspace:
+            raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+        if not name:
+            raise ValueError(f"Expected a non-empty value for `name` but received {name!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncPromptsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncPromptsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#accessing-raw-response-data-e-g-headers
+        """
+        return AsyncPromptsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPromptsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#with_streaming_response
+        """
+        return AsyncPromptsResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        workspace: str | None = None,
+        name: str,
+        description: str | Omit = omit,
+        inference_params: InferenceParams | Omit = omit,
+        input_variables: SequenceNotStr[str] | Omit = omit,
+        messages: Iterable[PromptMessageParam] | Omit = omit,
+        project: str | Omit = omit,
+        response_format: Dict[str, object] | Omit = omit,
+        tags: SequenceNotStr[str] | Omit = omit,
+        tool_choice: Union[str, Dict[str, object]] | Omit = omit,
+        tools: Iterable[ChatCompletionToolParam] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        exist_ok: bool = False,
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Create a new prompt.
+
+        Args:
+          name: Name of the prompt.
+
+        Allowed characters: letters (a-z, A-Z), digits (0-9),
+              underscores, hyphens, and dots.
+
+          inference_params: Parameters for model inference. Extra fields can be supplied for additional
+              options applied to the inference request directly. Fields not supported by the
+              model may cause inference errors during evaluation.
+
+          project: The URN of the project associated with this prompt.
+
+
+          exist_ok: Do not raise an error if the resource already exists. Returns the existing resource.
+
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        try:
+            if workspace is None:
+                workspace = self._client._get_workspace_path_param()
+            if not workspace:
+                raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+            return await self._post(
+                path_template("/apis/models/v2/workspaces/{workspace}/prompts", workspace=workspace),
+                body=await async_maybe_transform(
+                    {
+                        "name": name,
+                        "description": description,
+                        "inference_params": inference_params,
+                        "input_variables": input_variables,
+                        "messages": messages,
+                        "project": project,
+                        "response_format": response_format,
+                        "tags": tags,
+                        "tool_choice": tool_choice,
+                        "tools": tools,
+                    },
+                    prompt_create_params.PromptCreateParams,
+                ),
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=Prompt,
+            )
+        except ConflictError:
+            if not exist_ok:
+                raise
+            return await self.retrieve(name = name, workspace = workspace)
+
+    async def retrieve(
+        self,
+        name: str,
+        *,
+        workspace: str | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """
+        Get a prompt by workspace and name.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if workspace is None:
+            workspace = self._client._get_workspace_path_param()
+        if not workspace:
+            raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+        if not name:
+            raise ValueError(f"Expected a non-empty value for `name` but received {name!r}")
+        return await self._get(
+            path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+    async def update(
+        self,
+        name: str,
+        *,
+        workspace: str | None = None,
+        description: str | Omit = omit,
+        inference_params: InferenceParams | Omit = omit,
+        input_variables: SequenceNotStr[str] | Omit = omit,
+        messages: Iterable[PromptMessageParam] | Omit = omit,
+        project: str | Omit = omit,
+        response_format: Dict[str, object] | Omit = omit,
+        tags: SequenceNotStr[str] | Omit = omit,
+        tool_choice: Union[str, Dict[str, object]] | Omit = omit,
+        tools: Iterable[ChatCompletionToolParam] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """
+        Update an existing prompt (full replacement of mutable fields).
+
+        Args:
+          inference_params: Parameters for model inference. Extra fields can be supplied for additional
+              options applied to the inference request directly. Fields not supported by the
+              model may cause inference errors during evaluation.
+
+          project: The URN of the project associated with this prompt.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if workspace is None:
+            workspace = self._client._get_workspace_path_param()
+        if not workspace:
+            raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+        if not name:
+            raise ValueError(f"Expected a non-empty value for `name` but received {name!r}")
+        return await self._put(
+            path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name),
+            body=await async_maybe_transform(
+                {
+                    "description": description,
+                    "inference_params": inference_params,
+                    "input_variables": input_variables,
+                    "messages": messages,
+                    "project": project,
+                    "response_format": response_format,
+                    "tags": tags,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                },
+                prompt_update_params.PromptUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+    def list(
+        self,
+        *,
+        workspace: str | None = None,
+        filter: PromptFilterParam | Omit = omit,
+        page: int | Omit = omit,
+        page_size: int | Omit = omit,
+        sort: PromptSort | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[Prompt, AsyncDefaultPagination[Prompt]]:
+        """
+        List prompts for a specific workspace.
+
+        Args:
+          filter: Filter prompts by workspace, project, name, description, created_at, and
+              updated_at.
+
+          page: Page number.
+
+          page_size: Page size.
+
+          sort: The field to sort by. To sort in decreasing order, use `-` in front of the field
+              name.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if workspace is None:
+            workspace = self._client._get_workspace_path_param()
+        if not workspace:
+            raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+        return self._get_api_list(
+            path_template("/apis/models/v2/workspaces/{workspace}/prompts", workspace=workspace),
+            page=AsyncDefaultPagination[Prompt],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "filter": filter,
+                        "page": page,
+                        "page_size": page_size,
+                        "sort": sort,
+                    },
+                    prompt_list_params.PromptListParams,
+                ),
+            ),
+            model=Prompt,
+        )
+
+    async def delete(
+        self,
+        name: str,
+        *,
+        workspace: str | None = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete a prompt by workspace and name.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if workspace is None:
+            workspace = self._client._get_workspace_path_param()
+        if not workspace:
+            raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}")
+        if not name:
+            raise ValueError(f"Expected a non-empty value for `name` but received {name!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class PromptsResourceWithRawResponse:
+    def __init__(self, prompts: PromptsResource) -> None:
+        self._prompts = prompts
+
+        self.create = to_raw_response_wrapper(
+            prompts.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            prompts.retrieve,
+        )
+        self.update = to_raw_response_wrapper(
+            prompts.update,
+        )
+        self.list = to_raw_response_wrapper(
+            prompts.list,
+        )
+        self.delete = to_raw_response_wrapper(
+            prompts.delete,
+        )
+
+
+class AsyncPromptsResourceWithRawResponse:
+    def __init__(self, prompts: AsyncPromptsResource) -> None:
+        self._prompts = prompts
+
+        self.create = async_to_raw_response_wrapper(
+            prompts.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            prompts.retrieve,
+        )
+        self.update = async_to_raw_response_wrapper(
+            prompts.update,
+        )
+        self.list = async_to_raw_response_wrapper(
+            prompts.list,
+        )
+        self.delete = async_to_raw_response_wrapper(
+            prompts.delete,
+        )
+
+
+class PromptsResourceWithStreamingResponse:
+    def __init__(self, prompts: PromptsResource) -> None:
+        self._prompts = prompts
+
+        self.create = to_streamed_response_wrapper(
+            prompts.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            prompts.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            prompts.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            prompts.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            prompts.delete,
+        )
+
+
+class AsyncPromptsResourceWithStreamingResponse:
+    def __init__(self, prompts: AsyncPromptsResource) -> None:
+        self._prompts = prompts
+
+        self.create = async_to_streamed_response_wrapper(
+            prompts.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            prompts.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            prompts.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            prompts.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            prompts.delete,
+        )
diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/api.md b/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/api.md
index 99f688895d..f5bfd41e0c 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/api.md
+++ b/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/api.md
@@ -8,8 +8,10 @@ from nemo_platform.types.jobs import (
     ComputeResources,
     ContainerSpec,
     CPUExecutionProvider,
+    CPUExecutionProviderParam,
     CreatePlatformJobRequest,
     DistributedGPUExecutionProvider,
+    DistributedGPUExecutionProviderParam,
     DockerJobExecutionProfile,
     DockerJobExecutionProfileConfig,
     DockerJobNetworkConfig,
@@ -17,6 +19,7 @@ from nemo_platform.types.jobs import (
     DockerVolumeMount,
     E2EJobExecutionProfile,
     GPUExecutionProvider,
+    GPUExecutionProviderParam,
     ImagePullSecret,
     JobExecutionProfileConfig,
     KubernetesEmptyDirVolume,
@@ -33,7 +36,9 @@ from nemo_platform.types.jobs import (
     PlatformJobSecretEnvironmentVariableRef,
     PlatformJobSortField,
     PlatformJobSpec,
+    PlatformJobSpecParam,
     PlatformJobStepSpec,
+    PlatformJobStepSpecParam,
     PlatformJobsListFilter,
     StepLifecycle,
     SubprocessExecutionProvider,
diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/jobs.py b/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/jobs.py
index 3a7aff2408..9c5652b216 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/jobs.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/jobs.py
@@ -58,6 +58,7 @@
 from ...pagination import SyncLogsPagination, AsyncLogsPagination, SyncDefaultPagination, AsyncDefaultPagination
 from ...types.jobs import (
     PlatformJobSortField,
+    PlatformJobSpecParam,
     job_list_params,
     job_create_params,
     job_get_logs_params,
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py
index 2d670dadaf..fafcd134f4 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py
@@ -32,7 +32,6 @@
     PlatformJobLog as PlatformJobLog,
     ToolCallConfig as ToolCallConfig,
     APIEndpointData as APIEndpointData,
-    FilesetMetadata as FilesetMetadata,
     FileStorageType as FileStorageType,
     InferenceParams as InferenceParams,
     LinearLayerSpec as LinearLayerSpec,
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py
index 3833c1d785..b76dd4a694 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py
@@ -22,6 +22,7 @@
 from .cache_status import CacheStatus as CacheStatus
 from .fileset_file import FilesetFile as FilesetFile
 from .fileset_purpose import FilesetPurpose as FilesetPurpose
+from .fileset_metadata import FilesetMetadata as FilesetMetadata
 from .s3_storage_config import S3StorageConfig as S3StorageConfig
 from .ngc_storage_config import NGCStorageConfig as NGCStorageConfig
 from .fileset_list_params import FilesetListParams as FilesetListParams
@@ -32,6 +33,7 @@
 from .fileset_create_params import FilesetCreateParams as FilesetCreateParams
 from .fileset_update_params import FilesetUpdateParams as FilesetUpdateParams
 from .file_list_files_params import FileListFilesParams as FileListFilesParams
+from .fileset_metadata_param import FilesetMetadataParam as FilesetMetadataParam
 from .file_upload_file_params import FileUploadFileParams as FileUploadFileParams
 from .s3_storage_config_param import S3StorageConfigParam as S3StorageConfigParam
 from .ngc_storage_config_param import NGCStorageConfigParam as NGCStorageConfigParam
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py
index 810d5ce990..e6d9642b7a 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py
@@ -20,10 +20,10 @@
 
 from ..._models import BaseModel
 from .fileset_purpose import FilesetPurpose
+from .fileset_metadata import FilesetMetadata
 from .s3_storage_config import S3StorageConfig
 from .ngc_storage_config import NGCStorageConfig
 from .local_storage_config import LocalStorageConfig
-from ..shared.fileset_metadata import FilesetMetadata
 from .huggingface_storage_config import HuggingfaceStorageConfig
 
 __all__ = ["Fileset", "Storage"]
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py
index 06715b1c74..ccab3462e8 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py
@@ -21,10 +21,10 @@
 from typing_extensions import Required, TypeAlias, TypedDict
 
 from .fileset_purpose import FilesetPurpose
+from .fileset_metadata_param import FilesetMetadataParam
 from .s3_storage_config_param import S3StorageConfigParam
 from .ngc_storage_config_param import NGCStorageConfigParam
 from .local_storage_config_param import LocalStorageConfigParam
-from ..shared_params.fileset_metadata import FilesetMetadata
 from .huggingface_storage_config_param import HuggingfaceStorageConfigParam
 
 __all__ = ["FilesetCreateParams", "Storage"]
@@ -49,7 +49,7 @@ class FilesetCreateParams(TypedDict, total=False):
     description: str
     """The description of the fileset."""
 
-    metadata: FilesetMetadata
+    metadata: FilesetMetadataParam
     """Tagged metadata container - the key indicates the type.
 
     Example: metadata = FilesetMetadata( dataset=DatasetMetadataContent(
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared/fileset_metadata.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata.py
similarity index 91%
rename from sdk/python/nemo-platform/src/nemo_platform/types/shared/fileset_metadata.py
rename to sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata.py
index b35b6d8ecc..36573bd374 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/shared/fileset_metadata.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata.py
@@ -18,8 +18,8 @@
 from typing import Optional
 
 from ..._models import BaseModel
-from .model_metadata_content import ModelMetadataContent
-from .dataset_metadata_content import DatasetMetadataContent
+from ..shared.model_metadata_content import ModelMetadataContent
+from ..shared.dataset_metadata_content import DatasetMetadataContent
 
 __all__ = ["FilesetMetadata"]
 
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py
similarity index 85%
rename from sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata.py
rename to sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py
index d53a643b0d..66f37de921 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py
@@ -19,13 +19,13 @@
 
 from typing_extensions import TypedDict
 
-from .model_metadata_content import ModelMetadataContent
-from .dataset_metadata_content import DatasetMetadataContent
+from ..shared_params.model_metadata_content import ModelMetadataContent
+from ..shared_params.dataset_metadata_content import DatasetMetadataContent
 
-__all__ = ["FilesetMetadata"]
+__all__ = ["FilesetMetadataParam"]
 
 
-class FilesetMetadata(TypedDict, total=False):
+class FilesetMetadataParam(TypedDict, total=False):
     """Tagged metadata container - the key indicates the type.
 
     Example:
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_update_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_update_params.py
index 3f8699dda8..0b389fd318 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_update_params.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_update_params.py
@@ -21,7 +21,7 @@
 from typing_extensions import TypedDict
 
 from .fileset_purpose import FilesetPurpose
-from ..shared_params.fileset_metadata import FilesetMetadata
+from .fileset_metadata_param import FilesetMetadataParam
 
 __all__ = ["FilesetUpdateParams"]
 
@@ -35,7 +35,7 @@ class FilesetUpdateParams(TypedDict, total=False):
     description: str
     """The description of the fileset."""
 
-    metadata: FilesetMetadata
+    metadata: FilesetMetadataParam
     """Tagged metadata container - the key indicates the type.
 
     Example: metadata = FilesetMetadata( dataset=DatasetMetadataContent(
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/__init__.py
index a8b865241a..20356db654 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/inference/__init__.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/__init__.py
@@ -18,14 +18,26 @@
 from __future__ import annotations
 
 from .engine import Engine as Engine
+from .prompt import Prompt as Prompt
 from .model_type import ModelType as ModelType
+from .prompt_sort import PromptSort as PromptSort
+from .prompts_page import PromptsPage as PromptsPage
 from .virtual_model import VirtualModel as VirtualModel
 from .model_provider import ModelProvider as ModelProvider
+from .prompt_message import PromptMessage as PromptMessage
 from .middleware_call import MiddlewareCall as MiddlewareCall
 from .model_deployment import ModelDeployment as ModelDeployment
+from .prompt_list_params import PromptListParams as PromptListParams
+from .function_definition import FunctionDefinition as FunctionDefinition
 from .model_provider_sort import ModelProviderSort as ModelProviderSort
+from .prompt_filter_param import PromptFilterParam as PromptFilterParam
+from .prompt_message_role import PromptMessageRole as PromptMessageRole
 from .virtual_models_page import VirtualModelsPage as VirtualModelsPage
+from .chat_completion_tool import ChatCompletionTool as ChatCompletionTool
 from .model_providers_page import ModelProvidersPage as ModelProvidersPage
+from .prompt_create_params import PromptCreateParams as PromptCreateParams
+from .prompt_message_param import PromptMessageParam as PromptMessageParam
+from .prompt_update_params import PromptUpdateParams as PromptUpdateParams
 from .provider_list_params import ProviderListParams as ProviderListParams
 from .served_model_mapping import ServedModelMapping as ServedModelMapping
 from .middleware_call_param import MiddlewareCallParam as MiddlewareCallParam
@@ -40,7 +52,9 @@
 from .deployment_create_params import DeploymentCreateParams as DeploymentCreateParams
 from .deployment_update_params import DeploymentUpdateParams as DeploymentUpdateParams
 from .container_executor_config import ContainerExecutorConfig as ContainerExecutorConfig
+from .function_definition_param import FunctionDefinitionParam as FunctionDefinitionParam
 from .virtual_model_list_params import VirtualModelListParams as VirtualModelListParams
+from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam
 from .served_model_mapping_param import ServedModelMappingParam as ServedModelMappingParam
 from .virtual_model_patch_params import VirtualModelPatchParams as VirtualModelPatchParams
 from .model_provider_filter_param import ModelProviderFilterParam as ModelProviderFilterParam
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool.py
new file mode 100644
index 0000000000..c0d66c32a2
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool.py
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .function_definition import FunctionDefinition
+
+__all__ = ["ChatCompletionTool"]
+
+
+class ChatCompletionTool(BaseModel):
+    """An OpenAI-compatible tool definition (currently always a function tool)."""
+
+    function: FunctionDefinition
+    """An OpenAI-compatible function definition for tool calling.
+
+    Mirrors the `function` object the Inference Gateway forwards to
+    OpenAI-compatible backends.
+    """
+
+    type: Literal["function"]
+    """The type of the tool. Currently only 'function' is supported."""
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool_param.py
new file mode 100644
index 0000000000..3275a2236e
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool_param.py
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .function_definition_param import FunctionDefinitionParam
+
+__all__ = ["ChatCompletionToolParam"]
+
+
+class ChatCompletionToolParam(TypedDict, total=False):
+    """An OpenAI-compatible tool definition (currently always a function tool)."""
+
+    function: Required[FunctionDefinitionParam]
+    """An OpenAI-compatible function definition for tool calling.
+
+    Mirrors the `function` object the Inference Gateway forwards to
+    OpenAI-compatible backends.
+    """
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently only 'function' is supported."""
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition.py
new file mode 100644
index 0000000000..1fdd2ceb03
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FunctionDefinition"]
+
+
+class FunctionDefinition(BaseModel):
+    """An OpenAI-compatible function definition for tool calling.
+
+    Mirrors the ``function`` object the Inference Gateway forwards to
+    OpenAI-compatible backends.
+    """
+
+    name: str
+    """The name of the function to be called."""
+
+    description: Optional[str] = None
+    """
+    A description of what the function does, used by the model to decide when and
+    how to call it.
+    """
+
+    parameters: Optional[Dict[str, object]] = None
+    """The parameters the function accepts, described as a JSON Schema object."""
+
+    strict: Optional[bool] = None
+    """Whether to enforce strict schema adherence when generating the function call."""
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition_param.py
new file mode 100644
index 0000000000..d42fd2a983
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition_param.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FunctionDefinitionParam"]
+
+
+class FunctionDefinitionParam(TypedDict, total=False):
+    """An OpenAI-compatible function definition for tool calling.
+
+    Mirrors the ``function`` object the Inference Gateway forwards to
+    OpenAI-compatible backends.
+    """
+
+    name: Required[str]
+    """The name of the function to be called."""
+
+    description: str
+    """
+    A description of what the function does, used by the model to decide when and
+    how to call it.
+    """
+
+    parameters: Dict[str, object]
+    """The parameters the function accepts, described as a JSON Schema object."""
+
+    strict: bool
+    """Whether to enforce strict schema adherence when generating the function call."""
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt.py
new file mode 100644
index 0000000000..c16b94d2ec
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt.py
@@ -0,0 +1,96 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from datetime import datetime
+
+from ..._models import BaseModel
+from .prompt_message import PromptMessage
+from .chat_completion_tool import ChatCompletionTool
+from ..shared.inference_params import InferenceParams
+
+__all__ = ["Prompt"]
+
+
+class Prompt(BaseModel):
+    """A reusable, stored chat prompt.
+
+    A Prompt captures the messages, declared template variables, optional tool
+    definitions, and default inference parameters needed to invoke a model
+    through the Inference Gateway. The unique identifier is workspace/name.
+    """
+
+    created_at: datetime
+    """The timestamp of model entity creation"""
+
+    name: str
+    """Name of the entity.
+
+    Name/workspace combo must be unique across all entities. Allowed characters:
+    letters (a-z, A-Z), digits (0-9), underscores, hyphens, and dots.
+    """
+
+    updated_at: datetime
+    """The timestamp of the last model entity update"""
+
+    workspace: str
+    """The workspace of the entity.
+
+    Allowed characters: letters (a-z, A-Z), digits (0-9), underscores, hyphens, and
+    dots.
+    """
+
+    id: Optional[str] = None
+    """Unique identifier for the prompt."""
+
+    description: Optional[str] = None
+    """Optional description of the prompt."""
+
+    inference_params: Optional[InferenceParams] = None
+    """Parameters for model inference.
+
+    Extra fields can be supplied for additional options applied to the inference
+    request directly. Fields not supported by the model may cause inference errors
+    during evaluation.
+    """
+
+    input_variables: Optional[List[str]] = None
+    """Names of the Jinja2 template variables the prompt expects."""
+
+    messages: Optional[List[PromptMessage]] = None
+    """Ordered list of chat messages that make up the prompt."""
+
+    project: Optional[str] = None
+    """The URN of the project associated with this entity."""
+
+    response_format: Optional[Dict[str, object]] = None
+    """Optional OpenAI-compatible response_format, e.g.
+
+    a json_schema structured-output spec.
+    """
+
+    tags: Optional[List[str]] = None
+    """Optional free-form tags for organizing prompts."""
+
+    tool_choice: Union[str, Dict[str, object], None] = None
+    """
+    Controls which (if any) tool is called: 'none', 'auto', 'required', or a
+    named-tool object.
+    """
+
+    tools: Optional[List[ChatCompletionTool]] = None
+    """Optional OpenAI-compatible tool definitions to send with the prompt."""
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_create_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_create_params.py
new file mode 100644
index 0000000000..66960a3ca9
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_create_params.py
@@ -0,0 +1,64 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from ..._types import SequenceNotStr
+from .prompt_message_param import PromptMessageParam
+from .chat_completion_tool_param import ChatCompletionToolParam
+from ..shared_params.inference_params import InferenceParams
+
+__all__ = ["PromptCreateParams"]
+
+
+class PromptCreateParams(TypedDict, total=False):
+    workspace: str
+
+    name: Required[str]
+    """Name of the prompt.
+
+    Allowed characters: letters (a-z, A-Z), digits (0-9), underscores, hyphens, and
+    dots.
+    """
+
+    description: str
+
+    inference_params: InferenceParams
+    """Parameters for model inference.
+
+    Extra fields can be supplied for additional options applied to the inference
+    request directly. Fields not supported by the model may cause inference errors
+    during evaluation.
+    """
+
+    input_variables: SequenceNotStr[str]
+
+    messages: Iterable[PromptMessageParam]
+
+    project: str
+    """The URN of the project associated with this prompt."""
+
+    response_format: Dict[str, object]
+
+    tags: SequenceNotStr[str]
+
+    tool_choice: Union[str, Dict[str, object]]
+
+    tools: Iterable[ChatCompletionToolParam]
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_filter_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_filter_param.py
new file mode 100644
index 0000000000..55bcc70223
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_filter_param.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from ..shared_params.datetime_filter import DatetimeFilter
+
+__all__ = ["PromptFilterParam"]
+
+
+class PromptFilterParam(TypedDict, total=False):
+    """Filter for Prompt queries."""
+
+    created_at: DatetimeFilter
+    """Filter by creation date."""
+
+    description: str
+    """Filter by description."""
+
+    name: str
+    """Filter by name."""
+
+    project: str
+    """Filter by project URN."""
+
+    updated_at: DatetimeFilter
+    """Filter by update date."""
+
+    workspace: str
+    """Filter by workspace."""
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_list_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_list_params.py
new file mode 100644
index 0000000000..a9e8d1629d
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_list_params.py
@@ -0,0 +1,47 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from .prompt_sort import PromptSort
+from .prompt_filter_param import PromptFilterParam
+
+__all__ = ["PromptListParams"]
+
+
+class PromptListParams(TypedDict, total=False):
+    workspace: str
+
+    filter: PromptFilterParam
+    """
+    Filter prompts by workspace, project, name, description, created_at, and
+    updated_at.
+    """
+
+    page: int
+    """Page number."""
+
+    page_size: int
+    """Page size."""
+
+    sort: PromptSort
+    """The field to sort by.
+
+    To sort in decreasing order, use `-` in front of the field name.
+    """
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message.py
new file mode 100644
index 0000000000..255845e5c1
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message.py
@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+from .prompt_message_role import PromptMessageRole
+
+__all__ = ["PromptMessage"]
+
+
+class PromptMessage(BaseModel):
+    """A single templated message in a chat prompt.
+
+    ``content`` is a Jinja2 template body that may reference the prompt's
+    declared ``input_variables`` (e.g. ``{{ topic }}``).
+    """
+
+    content: str
+    """Templated message content. May contain template variables."""
+
+    role: PromptMessageRole
+    """Role of a message author in a chat prompt.
+
+    Follows the OpenAI chat schema the Inference Gateway speaks
+    (`/v1/chat/completions`).
+    """
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_param.py
new file mode 100644
index 0000000000..a861ed6e10
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_param.py
@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .prompt_message_role import PromptMessageRole
+
+__all__ = ["PromptMessageParam"]
+
+
+class PromptMessageParam(TypedDict, total=False):
+    """A single templated message in a chat prompt.
+
+    ``content`` is a Jinja2 template body that may reference the prompt's
+    declared ``input_variables`` (e.g. ``{{ topic }}``).
+    """
+
+    content: Required[str]
+    """Templated message content. May contain template variables."""
+
+    role: Required[PromptMessageRole]
+    """Role of a message author in a chat prompt.
+
+    Follows the OpenAI chat schema the Inference Gateway speaks
+    (`/v1/chat/completions`).
+    """
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_role.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_role.py
new file mode 100644
index 0000000000..9ce7d3e39b
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_role.py
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["PromptMessageRole"]
+
+PromptMessageRole: TypeAlias = Literal["system", "developer", "user", "assistant"]
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_sort.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_sort.py
new file mode 100644
index 0000000000..f158f56b59
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_sort.py
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["PromptSort"]
+
+PromptSort: TypeAlias = Literal["name", "-name", "created_at", "-created_at", "updated_at", "-updated_at"]
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_update_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_update_params.py
new file mode 100644
index 0000000000..c93613b7cc
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_update_params.py
@@ -0,0 +1,57 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import TypedDict
+
+from ..._types import SequenceNotStr
+from .prompt_message_param import PromptMessageParam
+from .chat_completion_tool_param import ChatCompletionToolParam
+from ..shared_params.inference_params import InferenceParams
+
+__all__ = ["PromptUpdateParams"]
+
+
+class PromptUpdateParams(TypedDict, total=False):
+    workspace: str
+
+    description: str
+
+    inference_params: InferenceParams
+    """Parameters for model inference.
+
+    Extra fields can be supplied for additional options applied to the inference
+    request directly. Fields not supported by the model may cause inference errors
+    during evaluation.
+    """
+
+    input_variables: SequenceNotStr[str]
+
+    messages: Iterable[PromptMessageParam]
+
+    project: str
+    """The URN of the project associated with this prompt."""
+
+    response_format: Dict[str, object]
+
+    tags: SequenceNotStr[str]
+
+    tool_choice: Union[str, Dict[str, object]]
+
+    tools: Iterable[ChatCompletionToolParam]
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompts_page.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompts_page.py
new file mode 100644
index 0000000000..e317a98336
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompts_page.py
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Optional
+
+from .prompt import Prompt
+from ..._models import BaseModel
+from ..shared.pagination_data import PaginationData
+
+__all__ = ["PromptsPage"]
+
+
+class PromptsPage(BaseModel):
+    data: List[Prompt]
+
+    filter: Optional[Dict[str, object]] = None
+    """Filtering information."""
+
+    pagination: Optional[PaginationData] = None
+    """Pagination information."""
+
+    sort: Optional[str] = None
+    """The field on which the results are sorted."""
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py
index e0178b4f49..7a667ef8b5 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py
@@ -26,7 +26,6 @@
 from .delete_response import DeleteResponse as DeleteResponse
 from .finetuning_type import FinetuningType as FinetuningType
 from .pagination_data import PaginationData as PaginationData
-from .fileset_metadata import FilesetMetadata as FilesetMetadata
 from .inference_params import InferenceParams as InferenceParams
 from .platform_job_log import PlatformJobLog as PlatformJobLog
 from .tool_call_config import ToolCallConfig as ToolCallConfig
diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/__init__.py
index 449d6c5e14..f78dae8e90 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/__init__.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/__init__.py
@@ -23,7 +23,6 @@
 from .backend_format import BackendFormat as BackendFormat
 from .datetime_filter import DatetimeFilter as DatetimeFilter
 from .finetuning_type import FinetuningType as FinetuningType
-from .fileset_metadata import FilesetMetadata as FilesetMetadata
 from .inference_params import InferenceParams as InferenceParams
 from .tool_call_config import ToolCallConfig as ToolCallConfig
 from .api_endpoint_data import APIEndpointData as APIEndpointData
diff --git a/sdk/python/nemo-platform/tests/api_resources/inference/test_prompts.py b/sdk/python/nemo-platform/tests/api_resources/inference/test_prompts.py
new file mode 100644
index 0000000000..ed9863015f
--- /dev/null
+++ b/sdk/python/nemo-platform/tests/api_resources/inference/test_prompts.py
@@ -0,0 +1,741 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from nemo_platform import NeMoPlatform, AsyncNeMoPlatform
+from nemo_platform._utils import parse_datetime
+from nemo_platform.pagination import SyncDefaultPagination, AsyncDefaultPagination
+from nemo_platform.types.inference import (
+    Prompt,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestPrompts:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_create(self, client: NeMoPlatform) -> None:
+        prompt = client.inference.prompts.create(
+            workspace="workspace",
+            name="support-bot-system",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_create_with_all_params(self, client: NeMoPlatform) -> None:
+        prompt = client.inference.prompts.create(
+            workspace="workspace",
+            name="support-bot-system",
+            description="description",
+            inference_params={
+                "max_completion_tokens": 1,
+                "max_tokens": 1,
+                "model": "model",
+                "stop": ["string"],
+                "temperature": 0,
+                "top_p": 0,
+            },
+            input_variables=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            project="project",
+            response_format={"foo": "bar"},
+            tags=["string"],
+            tool_choice="string",
+            tools=[
+                {
+                    "function": {
+                        "name": "name",
+                        "description": "description",
+                        "parameters": {"foo": "bar"},
+                        "strict": True,
+                    },
+                    "type": "function",
+                }
+            ],
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_raw_response_create(self, client: NeMoPlatform) -> None:
+        response = client.inference.prompts.with_raw_response.create(
+            workspace="workspace",
+            name="support-bot-system",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_streaming_response_create(self, client: NeMoPlatform) -> None:
+        with client.inference.prompts.with_streaming_response.create(
+            workspace="workspace",
+            name="support-bot-system",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_path_params_create(self, client: NeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            client.inference.prompts.with_raw_response.create(
+                workspace="",
+                name="support-bot-system",
+            )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_retrieve(self, client: NeMoPlatform) -> None:
+        prompt = client.inference.prompts.retrieve(
+            name="name",
+            workspace="workspace",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_raw_response_retrieve(self, client: NeMoPlatform) -> None:
+        response = client.inference.prompts.with_raw_response.retrieve(
+            name="name",
+            workspace="workspace",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_streaming_response_retrieve(self, client: NeMoPlatform) -> None:
+        with client.inference.prompts.with_streaming_response.retrieve(
+            name="name",
+            workspace="workspace",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_path_params_retrieve(self, client: NeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            client.inference.prompts.with_raw_response.retrieve(
+                name="name",
+                workspace="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"):
+            client.inference.prompts.with_raw_response.retrieve(
+                name="",
+                workspace="workspace",
+            )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_update(self, client: NeMoPlatform) -> None:
+        prompt = client.inference.prompts.update(
+            name="name",
+            workspace="workspace",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_update_with_all_params(self, client: NeMoPlatform) -> None:
+        prompt = client.inference.prompts.update(
+            name="name",
+            workspace="workspace",
+            description="description",
+            inference_params={
+                "max_completion_tokens": 1,
+                "max_tokens": 1,
+                "model": "model",
+                "stop": ["string"],
+                "temperature": 0,
+                "top_p": 0,
+            },
+            input_variables=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            project="project",
+            response_format={"foo": "bar"},
+            tags=["string"],
+            tool_choice="string",
+            tools=[
+                {
+                    "function": {
+                        "name": "name",
+                        "description": "description",
+                        "parameters": {"foo": "bar"},
+                        "strict": True,
+                    },
+                    "type": "function",
+                }
+            ],
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_raw_response_update(self, client: NeMoPlatform) -> None:
+        response = client.inference.prompts.with_raw_response.update(
+            name="name",
+            workspace="workspace",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_streaming_response_update(self, client: NeMoPlatform) -> None:
+        with client.inference.prompts.with_streaming_response.update(
+            name="name",
+            workspace="workspace",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_path_params_update(self, client: NeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            client.inference.prompts.with_raw_response.update(
+                name="name",
+                workspace="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"):
+            client.inference.prompts.with_raw_response.update(
+                name="",
+                workspace="workspace",
+            )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_list(self, client: NeMoPlatform) -> None:
+        prompt = client.inference.prompts.list(
+            workspace="workspace",
+        )
+        assert_matches_type(SyncDefaultPagination[Prompt], prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_list_with_all_params(self, client: NeMoPlatform) -> None:
+        prompt = client.inference.prompts.list(
+            workspace="workspace",
+            filter={
+                "created_at": {
+                    "gte": parse_datetime("2019-12-27T18:11:19.117Z"),
+                    "lte": parse_datetime("2019-12-27T18:11:19.117Z"),
+                },
+                "description": "description",
+                "name": "name",
+                "project": "project",
+                "updated_at": {
+                    "gte": parse_datetime("2019-12-27T18:11:19.117Z"),
+                    "lte": parse_datetime("2019-12-27T18:11:19.117Z"),
+                },
+                "workspace": "workspace",
+            },
+            page=1,
+            page_size=1,
+            sort="name",
+        )
+        assert_matches_type(SyncDefaultPagination[Prompt], prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_raw_response_list(self, client: NeMoPlatform) -> None:
+        response = client.inference.prompts.with_raw_response.list(
+            workspace="workspace",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert_matches_type(SyncDefaultPagination[Prompt], prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_streaming_response_list(self, client: NeMoPlatform) -> None:
+        with client.inference.prompts.with_streaming_response.list(
+            workspace="workspace",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert_matches_type(SyncDefaultPagination[Prompt], prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_path_params_list(self, client: NeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            client.inference.prompts.with_raw_response.list(
+                workspace="",
+            )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_method_delete(self, client: NeMoPlatform) -> None:
+        prompt = client.inference.prompts.delete(
+            name="name",
+            workspace="workspace",
+        )
+        assert prompt is None
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_raw_response_delete(self, client: NeMoPlatform) -> None:
+        response = client.inference.prompts.with_raw_response.delete(
+            name="name",
+            workspace="workspace",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert prompt is None
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_streaming_response_delete(self, client: NeMoPlatform) -> None:
+        with client.inference.prompts.with_streaming_response.delete(
+            name="name",
+            workspace="workspace",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert prompt is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    def test_path_params_delete(self, client: NeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            client.inference.prompts.with_raw_response.delete(
+                name="name",
+                workspace="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"):
+            client.inference.prompts.with_raw_response.delete(
+                name="",
+                workspace="workspace",
+            )
+
+
+class TestAsyncPrompts:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_create(self, async_client: AsyncNeMoPlatform) -> None:
+        prompt = await async_client.inference.prompts.create(
+            workspace="workspace",
+            name="support-bot-system",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncNeMoPlatform) -> None:
+        prompt = await async_client.inference.prompts.create(
+            workspace="workspace",
+            name="support-bot-system",
+            description="description",
+            inference_params={
+                "max_completion_tokens": 1,
+                "max_tokens": 1,
+                "model": "model",
+                "stop": ["string"],
+                "temperature": 0,
+                "top_p": 0,
+            },
+            input_variables=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            project="project",
+            response_format={"foo": "bar"},
+            tags=["string"],
+            tool_choice="string",
+            tools=[
+                {
+                    "function": {
+                        "name": "name",
+                        "description": "description",
+                        "parameters": {"foo": "bar"},
+                        "strict": True,
+                    },
+                    "type": "function",
+                }
+            ],
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncNeMoPlatform) -> None:
+        response = await async_client.inference.prompts.with_raw_response.create(
+            workspace="workspace",
+            name="support-bot-system",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncNeMoPlatform) -> None:
+        async with async_client.inference.prompts.with_streaming_response.create(
+            workspace="workspace",
+            name="support-bot-system",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncNeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            await async_client.inference.prompts.with_raw_response.create(
+                workspace="",
+                name="support-bot-system",
+            )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncNeMoPlatform) -> None:
+        prompt = await async_client.inference.prompts.retrieve(
+            name="name",
+            workspace="workspace",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncNeMoPlatform) -> None:
+        response = await async_client.inference.prompts.with_raw_response.retrieve(
+            name="name",
+            workspace="workspace",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncNeMoPlatform) -> None:
+        async with async_client.inference.prompts.with_streaming_response.retrieve(
+            name="name",
+            workspace="workspace",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncNeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            await async_client.inference.prompts.with_raw_response.retrieve(
+                name="name",
+                workspace="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"):
+            await async_client.inference.prompts.with_raw_response.retrieve(
+                name="",
+                workspace="workspace",
+            )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_update(self, async_client: AsyncNeMoPlatform) -> None:
+        prompt = await async_client.inference.prompts.update(
+            name="name",
+            workspace="workspace",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncNeMoPlatform) -> None:
+        prompt = await async_client.inference.prompts.update(
+            name="name",
+            workspace="workspace",
+            description="description",
+            inference_params={
+                "max_completion_tokens": 1,
+                "max_tokens": 1,
+                "model": "model",
+                "stop": ["string"],
+                "temperature": 0,
+                "top_p": 0,
+            },
+            input_variables=["string"],
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            project="project",
+            response_format={"foo": "bar"},
+            tags=["string"],
+            tool_choice="string",
+            tools=[
+                {
+                    "function": {
+                        "name": "name",
+                        "description": "description",
+                        "parameters": {"foo": "bar"},
+                        "strict": True,
+                    },
+                    "type": "function",
+                }
+            ],
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncNeMoPlatform) -> None:
+        response = await async_client.inference.prompts.with_raw_response.update(
+            name="name",
+            workspace="workspace",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncNeMoPlatform) -> None:
+        async with async_client.inference.prompts.with_streaming_response.update(
+            name="name",
+            workspace="workspace",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncNeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            await async_client.inference.prompts.with_raw_response.update(
+                name="name",
+                workspace="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"):
+            await async_client.inference.prompts.with_raw_response.update(
+                name="",
+                workspace="workspace",
+            )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_list(self, async_client: AsyncNeMoPlatform) -> None:
+        prompt = await async_client.inference.prompts.list(
+            workspace="workspace",
+        )
+        assert_matches_type(AsyncDefaultPagination[Prompt], prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncNeMoPlatform) -> None:
+        prompt = await async_client.inference.prompts.list(
+            workspace="workspace",
+            filter={
+                "created_at": {
+                    "gte": parse_datetime("2019-12-27T18:11:19.117Z"),
+                    "lte": parse_datetime("2019-12-27T18:11:19.117Z"),
+                },
+                "description": "description",
+                "name": "name",
+                "project": "project",
+                "updated_at": {
+                    "gte": parse_datetime("2019-12-27T18:11:19.117Z"),
+                    "lte": parse_datetime("2019-12-27T18:11:19.117Z"),
+                },
+                "workspace": "workspace",
+            },
+            page=1,
+            page_size=1,
+            sort="name",
+        )
+        assert_matches_type(AsyncDefaultPagination[Prompt], prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncNeMoPlatform) -> None:
+        response = await async_client.inference.prompts.with_raw_response.list(
+            workspace="workspace",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert_matches_type(AsyncDefaultPagination[Prompt], prompt, path=["response"])
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncNeMoPlatform) -> None:
+        async with async_client.inference.prompts.with_streaming_response.list(
+            workspace="workspace",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert_matches_type(AsyncDefaultPagination[Prompt], prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncNeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            await async_client.inference.prompts.with_raw_response.list(
+                workspace="",
+            )
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncNeMoPlatform) -> None:
+        prompt = await async_client.inference.prompts.delete(
+            name="name",
+            workspace="workspace",
+        )
+        assert prompt is None
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncNeMoPlatform) -> None:
+        response = await async_client.inference.prompts.with_raw_response.delete(
+            name="name",
+            workspace="workspace",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert prompt is None
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncNeMoPlatform) -> None:
+        async with async_client.inference.prompts.with_streaming_response.delete(
+            name="name",
+            workspace="workspace",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert prompt is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip(reason="Mock server tests are disabled")
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncNeMoPlatform) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"):
+            await async_client.inference.prompts.with_raw_response.delete(
+                name="name",
+                workspace="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"):
+            await async_client.inference.prompts.with_raw_response.delete(
+                name="",
+                workspace="workspace",
+            )
diff --git a/sdk/stainless.yaml b/sdk/stainless.yaml
index 9da6993d54..e96b34b9e4 100644
--- a/sdk/stainless.yaml
+++ b/sdk/stainless.yaml
@@ -26,16 +26,16 @@ custom_casings:
     initialism: true
 
 targets:
-#  typescript:
-#    _skip_running_tests: false
-#    package_name: nemo-platform-v1
-#    publish:
-#      npm: false
-#    skip: false
-#    options:
-#      mcp_server:
-#        package_name: nemo-platform-v1-mcp
-#        enable_all_resources: true
+  #  typescript:
+  #    _skip_running_tests: false
+  #    package_name: nemo-platform-v1
+  #    publish:
+  #      npm: false
+  #    skip: false
+  #    options:
+  #      mcp_server:
+  #        package_name: nemo-platform-v1-mcp
+  #        enable_all_resources: true
 
   python:
     # [docs]: https://www.stainless.com/docs/reference/editions
@@ -81,86 +81,86 @@ client_settings:
 # `pagination` defines [pagination schemes] which provides a template to match
 # endpoints and generate next-page and auto-pagination helpers in the SDKs.
 pagination:
-- name: default_pagination
-  type: page_number
-  request:
-    page:
-      type: integer
-      x-stainless-pagination-property:
-        purpose: page_number_param
-    page_size:
-      type: integer
-  response:
-    data:
-      type: array
-      x-stainless-pagination-property:
-        purpose: items
-      items:
+  - name: default_pagination
+    type: page_number
+    request:
+      page:
+        type: integer
+        x-stainless-pagination-property:
+          purpose: page_number_param
+      page_size:
+        type: integer
+    response:
+      data:
+        type: array
+        x-stainless-pagination-property:
+          purpose: items
+        items:
+          type: object
+          additionalProperties: true
+      pagination:
         type: object
-        additionalProperties: true
-    pagination:
-      type: object
-      properties:
-        page:
-          type: integer
-          title: Page
-          description: The current page number.
-          x-stainless-pagination-property:
-            purpose: current_page_number_field
-        page_size:
-          type: integer
-          title: Page Size
-          description: The page size used for the query.
-        current_page_size:
-          type: integer
-          title: Current Page Size
-          description: The size for the current page.
-        total_pages:
-          type: integer
-          title: Total Pages
-          description: The total number of pages.
-          x-stainless-pagination-property:
-            purpose: total_page_count_field
-        total_results:
-          type: integer
-          title: Total Results
-          description: The total number of results.
-      required:
-      - page
-      - page_size
-      - total_pages
-      - total_results
-      - current_page_size
-- name: logs_pagination
-  type: cursor
-  request:
-    limit:
-      type: integer
-    page_cursor:
-      type: string
-      x-stainless-pagination-property:
-        purpose: next_cursor_param
-  response:
-    data:
-      type: array
-      x-stainless-pagination-property:
-        purpose: items
-      items:
-        type: object
-        additionalProperties: true
-    next_page:
-      type: string
-      x-stainless-pagination-property:
-        purpose: next_cursor_field
+        properties:
+          page:
+            type: integer
+            title: Page
+            description: The current page number.
+            x-stainless-pagination-property:
+              purpose: current_page_number_field
+          page_size:
+            type: integer
+            title: Page Size
+            description: The page size used for the query.
+          current_page_size:
+            type: integer
+            title: Current Page Size
+            description: The size for the current page.
+          total_pages:
+            type: integer
+            title: Total Pages
+            description: The total number of pages.
+            x-stainless-pagination-property:
+              purpose: total_page_count_field
+          total_results:
+            type: integer
+            title: Total Results
+            description: The total number of results.
+        required:
+          - page
+          - page_size
+          - total_pages
+          - total_results
+          - current_page_size
+  - name: logs_pagination
+    type: cursor
+    request:
+      limit:
+        type: integer
+      page_cursor:
+        type: string
+        x-stainless-pagination-property:
+          purpose: next_cursor_param
+    response:
+      data:
+        type: array
+        x-stainless-pagination-property:
+          purpose: items
+        items:
+          type: object
+          additionalProperties: true
+      next_page:
+        type: string
+        x-stainless-pagination-property:
+          purpose: next_cursor_field
 
 streaming:
   on_event:
-  - data_starts_with: "[DONE]"
-    handle: done
-  - event_type: error
-    handle: error
-  - event_type:
-    handle: yield
+    - data_starts_with: "[DONE]"
+      handle: done
+    - event_type: error
+      handle: error
+    - event_type:
+      handle: yield
 
 readme:
   example_requests:
@@ -738,7 +738,6 @@ resources:
       tool_calling_metadata_content: ToolCallingMetadataContent
       backend_format: BackendFormat
       finetuning_type: FinetuningType
-      inference_params: InferenceParams
   iam:
     standalone_api: true
     subresources:

From 167f5c69652951ed3646be548d7b3e6c9ba43085 Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Thu, 11 Jun 2026 17:25:16 -0700
Subject: [PATCH 08/10] final lint

Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 docs/cli/reference.mdx                        | 213 ++++++++++
 .../cli/commands/api/inference/__init__.py    |   2 +
 .../cli/commands/api/inference/prompts.py     | 369 ++++++++++++++++++
 .../cli/commands/api/inference/__init__.py    |   2 +
 .../cli/commands/api/inference/prompts.py     | 369 ++++++++++++++++++
 5 files changed, 955 insertions(+)
 create mode 100644 packages/nemo_platform_ext/src/nemo_platform_ext/cli/commands/api/inference/prompts.py
 create mode 100644 sdk/python/nemo-platform/src/nemo_platform/cli/commands/api/inference/prompts.py

diff --git a/docs/cli/reference.mdx b/docs/cli/reference.mdx
index ed519d408b..42011f745e 100644
--- a/docs/cli/reference.mdx
+++ b/docs/cli/reference.mdx
@@ -1391,6 +1391,7 @@ nemo inference [OPTIONS] COMMAND [ARGS]...
 * `deployments`: Manage deployments
 * `gateway`: Gateway operations
 * `models`: Manage models
+* `prompts`: Manage prompts
 * `providers`: Manage providers
 * `virtual-models`: Manage virtual_models
 
@@ -2856,6 +2857,218 @@ nemo inference models list [OPTIONS]
 * `--no-truncate`: Don't truncate long values in table/markdown/csv output.
 * `--output-columns, -c`: Columns to display: 'default', 'all', or comma-separated names. Only affects table/csv/markdown formats.
 
+#### nemo inference prompts
+
+Manage prompts
+
+**Usage:**
+
+```shell
+nemo inference prompts [OPTIONS] COMMAND [ARGS]...
+```
+
+**Help:**
+
+* `--help, -h`: Show this message and exit.
+
+**Commands:**
+
+* `create`: Create a new prompt.
+* `delete`: Delete a prompt by workspace and name.
+* `list`: List prompts for a specific workspace.
+* `get`: Get a prompt by workspace and name.
+* `update`: Update an existing prompt (full replacement of mutable...
+
+##### nemo inference prompts create
+
+Create a new prompt.
+
+**Required fields:** name
+
+**Examples:**
+
+```shell
+nemo inference prompts create <name> --input-file config.json
+nemo inference prompts create <name> --input-data '{"name": "value"}'
+echo '{"json": "data"}' | nemo inference prompts create <name> --input-file -
+nemo inference prompts create <name> --<option> "value"
+```
+
+**Usage:**
+
+```shell
+nemo inference prompts create [OPTIONS] [NAME]
+```
+
+**Arguments:**
+
+* `<NAME>`: Name of the prompt.
+
+**Options:**
+
+* `--workspace`
+* `--description`
+* `--inference-params`: Parameters for model inference. Extra fields can be supplied for additional options applied to the inference request directly. Fields not supported by the model may cause inference errors during evaluation. (JSON string)
+* `--input-variables`: Can be repeated for multiple values
+* `--messages`: JSON string
+* `--project`: The URN of the project associated with this prompt.
+* `--response-format`: JSON string
+* `--tags`: Can be repeated for multiple values
+* `--tool-choice`: JSON string
+* `--tools`: JSON string
+* `--exist-ok`: Do not raise an error if the resource already exists. Returns the existing resource.
+
+**Help:**
+
+* `--help, -h`: Show this message and exit.
+
+**Input Options:**
+
+* `--input-file`: Path to JSON file (use '-' for stdin)
+* `--input-data`: Input data for the request (JSON or YAML)
+
+**Output Options:**
+
+* `--output-format, -f <CHOICE>`: Output format for an entity. [possible values: json, yaml, raw, code]
+
+##### nemo inference prompts delete
+
+Delete a prompt by workspace and name.
+
+**Usage:**
+
+```shell
+nemo inference prompts delete [OPTIONS] NAME
+```
+
+**Arguments:**
+
+* `<NAME>`
+
+**Options:**
+
+* `--workspace`
+
+**Help:**
+
+* `--help, -h`: Show this message and exit.
+
+##### nemo inference prompts list
+
+List prompts for a specific workspace.
+
+**Usage:**
+
+```shell
+nemo inference prompts list [OPTIONS]
+```
+
+**Options:**
+
+* `--workspace`
+* `--page <INTEGER>`: Page number.
+* `--page-size <INTEGER>`: Page size.
+* `--sort <CHOICE>`: The field to sort by. To sort in decreasing order, use `-` in front of the field name. [possible values: name, -name, created_at, -created_at, updated_at, -updated_at]
+* `--all-pages`: Fetch all pages
+
+**Filter Options:**
+
+* `--filter FILTER_JSON`: Use --filter with JSON for complex/nested queries, or --filter. FIELD options for simple fields. Both can be combined, with field options taking precedence.
+JSON-only fields:
+  created_at: \{gte: str, lte: str}
+  updated_at: \{gte: str, lte: str}
+
+Filter prompts by workspace, project, name, description, created_at, and updated_at.
+* `--filter.description`
+* `--filter.name`
+* `--filter.project`
+* `--filter.workspace`
+
+**Help:**
+
+* `--help, -h`: Show this message and exit.
+
+**Output Options:**
+
+* `--output-format, -f <CHOICE>`: Output format for the list of results. [possible values: table, json, yaml, markdown, csv, raw, code]
+* `--no-truncate`: Don't truncate long values in table/markdown/csv output.
+* `--output-columns, -c`: Columns to display: 'default', 'all', or comma-separated names. Only affects table/csv/markdown formats.
+
+##### nemo inference prompts get
+
+Get a prompt by workspace and name.
+
+**Usage:**
+
+```shell
+nemo inference prompts get [OPTIONS] NAME
+```
+
+**Arguments:**
+
+* `<NAME>`
+
+**Options:**
+
+* `--workspace`
+
+**Help:**
+
+* `--help, -h`: Show this message and exit.
+
+**Output Options:**
+
+* `--output-format, -f <CHOICE>`: Output format for an entity. [possible values: json, yaml, raw, code]
+
+##### nemo inference prompts update
+
+Update an existing prompt (full replacement of mutable fields).
+
+**Examples:**
+
+```shell
+nemo inference prompts update <name> --input-file config.json
+nemo inference prompts update <name> --input-data '{"field": "value"}'
+echo '{"json": "data"}' | nemo inference prompts update <name> --input-file -
+nemo inference prompts update <name> --<option> "value"
+```
+
+**Usage:**
+
+```shell
+nemo inference prompts update [OPTIONS] NAME
+```
+
+**Arguments:**
+
+* `<NAME>`
+
+**Options:**
+
+* `--workspace`
+* `--description`
+* `--inference-params`: Parameters for model inference. Extra fields can be supplied for additional options applied to the inference request directly. Fields not supported by the model may cause inference errors during evaluation. (JSON string)
+* `--input-variables`: Can be repeated for multiple values
+* `--messages`: JSON string
+* `--project`: The URN of the project associated with this prompt.
+* `--response-format`: JSON string
+* `--tags`: Can be repeated for multiple values
+* `--tool-choice`: JSON string
+* `--tools`: JSON string
+
+**Help:**
+
+* `--help, -h`: Show this message and exit.
+
+**Input Options:**
+
+* `--input-file`: Path to JSON file (use '-' for stdin)
+* `--input-data`: Input data for the request (JSON or YAML)
+
+**Output Options:**
+
+* `--output-format, -f <CHOICE>`: Output format for an entity. [possible values: json, yaml, raw, code]
+
 #### nemo inference providers
 
 Manage providers
diff --git a/packages/nemo_platform_ext/src/nemo_platform_ext/cli/commands/api/inference/__init__.py b/packages/nemo_platform_ext/src/nemo_platform_ext/cli/commands/api/inference/__init__.py
index f8b19e7098..188af3af81 100644
--- a/packages/nemo_platform_ext/src/nemo_platform_ext/cli/commands/api/inference/__init__.py
+++ b/packages/nemo_platform_ext/src/nemo_platform_ext/cli/commands/api/inference/__init__.py
@@ -13,6 +13,7 @@
     deployments,
     gateway,
     models,
+    prompts,
     providers,
     virtual_models,
 )
@@ -26,6 +27,7 @@
 app.add_typer(deployments.app, name="deployments")
 app.add_typer(gateway.app, name="gateway")
 app.add_typer(models.app, name="models")
+app.add_typer(prompts.app, name="prompts")
 app.add_typer(providers.app, name="providers")
 app.add_typer(virtual_models.app, name="virtual-models")
 
diff --git a/packages/nemo_platform_ext/src/nemo_platform_ext/cli/commands/api/inference/prompts.py b/packages/nemo_platform_ext/src/nemo_platform_ext/cli/commands/api/inference/prompts.py
new file mode 100644
index 0000000000..27a4a9daae
--- /dev/null
+++ b/packages/nemo_platform_ext/src/nemo_platform_ext/cli/commands/api/inference/prompts.py
@@ -0,0 +1,369 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# NOTE: This file is auto-generated
+from __future__ import annotations
+
+from typing import Annotated, Literal
+
+import typer
+
+from nemo_platform_ext.cli.core.api import build_kwargs, merge_filter_dict
+from nemo_platform_ext.cli.core.code_generator import handle_code_generation
+from nemo_platform_ext.cli.core.context import CLIContext
+from nemo_platform_ext.cli.core.errors import handle_errors
+from nemo_platform_ext.cli.core.formatters import Column, check_output_columns_with_format, format_output
+from nemo_platform_ext.cli.core.help_formatter import collect_warnings, create_typer_app
+from nemo_platform_ext.cli.core.pagination import PaginationType, fetch_all_pages, warn_if_more_pages
+from nemo_platform_ext.cli.core.stdin_utils import read_data_input_with_flags, read_payload, validate_required_fields
+from nemo_platform_ext.cli.core.types import (
+    EntityOutputFormatOption,
+    ListOutputFormatOption,
+    NoTruncateOption,
+    OutputColumnsOption,
+)
+
+app = create_typer_app(name="prompts", help="Manage prompts")
+
+
+@app.command("create")
+@collect_warnings
+@handle_errors
+def create_prompts(
+    ctx: typer.Context,
+    name: Annotated[str | None, typer.Argument(help="Name of the prompt. (required)")] = None,
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+    description: Annotated[str | None, typer.Option("--description")] = None,
+    inference_params: Annotated[
+        str | None,
+        typer.Option(
+            "--inference-params",
+            help="Parameters for model inference. Extra fields can be supplied for additional options applied to the inference request directly. Fields not supported by the model may cause inference errors during evaluation. (JSON string)",
+        ),
+    ] = None,
+    input_variables: Annotated[
+        list[str] | None, typer.Option("--input-variables", help="Can be repeated for multiple values")
+    ] = None,
+    messages: Annotated[str | None, typer.Option("--messages", help="JSON string")] = None,
+    project: Annotated[
+        str | None, typer.Option("--project", help="The URN of the project associated with this prompt.")
+    ] = None,
+    response_format: Annotated[str | None, typer.Option("--response-format", help="JSON string")] = None,
+    tags: Annotated[list[str] | None, typer.Option("--tags", help="Can be repeated for multiple values")] = None,
+    tool_choice: Annotated[str | None, typer.Option("--tool-choice", help="JSON string")] = None,
+    tools: Annotated[str | None, typer.Option("--tools", help="JSON string")] = None,
+    exist_ok: Annotated[
+        bool | None,
+        typer.Option(
+            "--exist-ok", help="Do not raise an error if the resource already exists. Returns the existing resource."
+        ),
+    ] = None,
+    input_file: Annotated[
+        str | None,
+        typer.Option("--input-file", help="Path to JSON file (use '-' for stdin)", rich_help_panel="Input Options"),
+    ] = None,
+    input_data: Annotated[
+        str | None,
+        typer.Option("--input-data", help="Input data for the request (JSON or YAML)", rich_help_panel="Input Options"),
+    ] = None,
+    output_format: EntityOutputFormatOption = None,
+) -> None:
+    """Create a new prompt.
+
+    [bold red]Required fields:[/] name
+
+    [green]Examples:[/]
+    nemo inference prompts create <name> --input-file config.json
+    nemo inference prompts create <name> --input-data '{"name": "value"}'
+    echo '{"json": "data"}' | nemo inference prompts create <name> --input-file -
+    nemo inference prompts create <name> --<option> "value"
+    """
+    # Read base input (optional if all fields provided via flags)
+    if input_file or input_data:
+        input_payload = read_data_input_with_flags(input_file=input_file, input_data=input_data)
+    else:
+        input_payload = {}
+
+    # Apply CLI flag overrides (flags take precedence)
+    if workspace is not None:
+        input_payload["workspace"] = workspace
+    if name is not None:
+        input_payload["name"] = name
+    if description is not None:
+        input_payload["description"] = description
+    if inference_params is not None:
+        input_payload["inference_params"] = read_payload("inference_params", inference_params)
+    if input_variables:  # Check for non-empty list
+        input_payload["input_variables"] = input_variables
+    if messages is not None:
+        input_payload["messages"] = read_payload("messages", messages)
+    if project is not None:
+        input_payload["project"] = project
+    if response_format is not None:
+        input_payload["response_format"] = read_payload("response_format", response_format)
+    if tags:  # Check for non-empty list
+        input_payload["tags"] = tags
+    if tool_choice is not None:
+        input_payload["tool_choice"] = read_payload("tool_choice", tool_choice)
+    if tools is not None:
+        input_payload["tools"] = read_payload("tools", tools)
+    if exist_ok is not None:
+        input_payload["exist_ok"] = exist_ok
+    # Validate required fields are present after merging
+    validate_required_fields(
+        input_payload,
+        ["name"],
+        "inference prompts create",
+        {
+            "name": "Name of the prompt. (required)",
+        },
+    )
+
+    all_kwargs = input_payload
+    state: CLIContext = ctx.obj
+    output_format = state.get_output_format(output_format)
+
+    if handle_code_generation(["inference", "prompts"], "create", all_kwargs, output_format, state):
+        return
+
+    client = state.get_client()
+    result = client.inference.prompts.create(**all_kwargs)
+
+    format_output(
+        result,
+        is_list=False,
+        output_format=output_format,
+        no_truncate=state.get_no_truncate(),
+        timestamp_format=state.get_timestamp_format(),
+    )
+
+
+@app.command("delete")
+@collect_warnings
+@handle_errors
+def delete_prompts(
+    ctx: typer.Context,
+    name: Annotated[str, typer.Argument()],
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+) -> None:
+    """Delete a prompt by workspace and name."""
+    state: CLIContext = ctx.obj
+    client = state.get_client()
+
+    kwargs = build_kwargs(
+        workspace=workspace,
+    )
+    client.inference.prompts.delete(name, **kwargs)
+
+    typer.echo("✓ Deleted successfully")
+
+
+@app.command("list")
+@collect_warnings
+@handle_errors
+def list_prompts(
+    ctx: typer.Context,
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+    filter: Annotated[
+        str | None,
+        typer.Option(
+            "--filter",
+            metavar="FILTER_JSON",
+            help="Use --filter with JSON for complex/nested queries, or --filter.FIELD options for simple fields. Both can be combined, with field options taking precedence.\nJSON-only fields:\n  created_at: {gte: str, lte: str}\n  updated_at: {gte: str, lte: str}\n\nFilter prompts by workspace, project, name, description, created_at, and updated_at.",
+            rich_help_panel="Filter Options",
+        ),
+    ] = None,
+    filter_description: Annotated[
+        str | None, typer.Option("--filter.description", rich_help_panel="Filter Options")
+    ] = None,
+    filter_name: Annotated[str | None, typer.Option("--filter.name", rich_help_panel="Filter Options")] = None,
+    filter_project: Annotated[str | None, typer.Option("--filter.project", rich_help_panel="Filter Options")] = None,
+    filter_workspace: Annotated[
+        str | None, typer.Option("--filter.workspace", rich_help_panel="Filter Options")
+    ] = None,
+    page: Annotated[int | None, typer.Option("--page", help="Page number.")] = None,
+    page_size: Annotated[int | None, typer.Option("--page-size", help="Page size.")] = None,
+    sort: Annotated[
+        Literal["name", "-name", "created_at", "-created_at", "updated_at", "-updated_at"] | None,
+        typer.Option(
+            "--sort", help="The field to sort by. To sort in decreasing order, use `-` in front of the field name."
+        ),
+    ] = None,
+    output_format: ListOutputFormatOption = None,
+    no_truncate: NoTruncateOption = None,
+    columns: OutputColumnsOption = None,
+    all_pages: Annotated[bool, typer.Option("--all-pages", help="Fetch all pages")] = False,
+) -> None:
+    """List prompts for a specific workspace."""
+    state: CLIContext = ctx.obj
+    output_format = state.get_output_format(output_format)
+
+    check_output_columns_with_format(columns, output_format)
+
+    default_columns = [
+        Column("name", None),
+        Column("workspace", None),
+        Column("created_at", None),
+    ]
+    if columns is None or str(columns).strip() == "default":
+        columns = default_columns
+
+    kwargs = build_kwargs(
+        workspace=workspace,
+        filter=merge_filter_dict(
+            filter, description=filter_description, name=filter_name, project=filter_project, workspace=filter_workspace
+        ),
+        page=page,
+        page_size=page_size,
+        sort=sort,
+    )
+
+    if handle_code_generation(["inference", "prompts"], "list", kwargs, output_format, state):
+        return
+
+    client = state.get_client()
+    path_args = ()
+    pagination_type = PaginationType.PAGE_NUMBER
+    if all_pages:
+        items = fetch_all_pages(
+            client.inference.prompts.list,
+            path_args=path_args,
+            body_args=kwargs,
+            pagination_type=pagination_type,
+        )
+    else:
+        items = client.inference.prompts.list(*path_args, **kwargs)
+
+    format_output(
+        items,
+        is_list=True,
+        output_format=output_format,
+        output_columns=columns,
+        no_truncate=state.get_no_truncate(no_truncate),
+        timestamp_format=state.get_timestamp_format(),
+    )
+    if not all_pages:
+        warn_if_more_pages(items, pagination_type)
+
+
+@app.command("get")
+@collect_warnings
+@handle_errors
+def retrieve_prompts(
+    ctx: typer.Context,
+    name: Annotated[str, typer.Argument()],
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+    output_format: EntityOutputFormatOption = None,
+) -> None:
+    """Get a prompt by workspace and name."""
+    state: CLIContext = ctx.obj
+    output_format = state.get_output_format(output_format)
+
+    kwargs = build_kwargs(
+        workspace=workspace,
+    )
+    if handle_code_generation(["inference", "prompts"], "retrieve", kwargs, output_format, state):
+        return
+
+    client = state.get_client()
+    result = client.inference.prompts.retrieve(name, **kwargs)
+
+    format_output(
+        result,
+        is_list=False,
+        output_format=output_format,
+        no_truncate=state.get_no_truncate(),
+        timestamp_format=state.get_timestamp_format(),
+    )
+
+
+@app.command("update")
+@collect_warnings
+@handle_errors
+def update_prompts(
+    ctx: typer.Context,
+    name: Annotated[str, typer.Argument()],
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+    description: Annotated[str | None, typer.Option("--description")] = None,
+    inference_params: Annotated[
+        str | None,
+        typer.Option(
+            "--inference-params",
+            help="Parameters for model inference. Extra fields can be supplied for additional options applied to the inference request directly. Fields not supported by the model may cause inference errors during evaluation. (JSON string)",
+        ),
+    ] = None,
+    input_variables: Annotated[
+        list[str] | None, typer.Option("--input-variables", help="Can be repeated for multiple values")
+    ] = None,
+    messages: Annotated[str | None, typer.Option("--messages", help="JSON string")] = None,
+    project: Annotated[
+        str | None, typer.Option("--project", help="The URN of the project associated with this prompt.")
+    ] = None,
+    response_format: Annotated[str | None, typer.Option("--response-format", help="JSON string")] = None,
+    tags: Annotated[list[str] | None, typer.Option("--tags", help="Can be repeated for multiple values")] = None,
+    tool_choice: Annotated[str | None, typer.Option("--tool-choice", help="JSON string")] = None,
+    tools: Annotated[str | None, typer.Option("--tools", help="JSON string")] = None,
+    input_file: Annotated[
+        str | None,
+        typer.Option("--input-file", help="Path to JSON file (use '-' for stdin)", rich_help_panel="Input Options"),
+    ] = None,
+    input_data: Annotated[
+        str | None,
+        typer.Option("--input-data", help="Input data for the request (JSON or YAML)", rich_help_panel="Input Options"),
+    ] = None,
+    output_format: EntityOutputFormatOption = None,
+) -> None:
+    """Update an existing prompt (full replacement of mutable fields).
+
+    [green]Examples:[/]
+    nemo inference prompts update <name> --input-file config.json
+    nemo inference prompts update <name> --input-data '{"field": "value"}'
+    echo '{"json": "data"}' | nemo inference prompts update <name> --input-file -
+    nemo inference prompts update <name> --<option> "value"
+    """
+    # Read base input (optional if all fields provided via flags)
+    if input_file or input_data:
+        input_payload = read_data_input_with_flags(input_file=input_file, input_data=input_data)
+    else:
+        input_payload = {}
+
+    # Apply CLI flag overrides (flags take precedence)
+    if workspace is not None:
+        input_payload["workspace"] = workspace
+    if description is not None:
+        input_payload["description"] = description
+    if inference_params is not None:
+        input_payload["inference_params"] = read_payload("inference_params", inference_params)
+    if input_variables:  # Check for non-empty list
+        input_payload["input_variables"] = input_variables
+    if messages is not None:
+        input_payload["messages"] = read_payload("messages", messages)
+    if project is not None:
+        input_payload["project"] = project
+    if response_format is not None:
+        input_payload["response_format"] = read_payload("response_format", response_format)
+    if tags:  # Check for non-empty list
+        input_payload["tags"] = tags
+    if tool_choice is not None:
+        input_payload["tool_choice"] = read_payload("tool_choice", tool_choice)
+    if tools is not None:
+        input_payload["tools"] = read_payload("tools", tools)
+
+    all_kwargs = {"name": name, **input_payload}
+
+    state: CLIContext = ctx.obj
+    output_format = state.get_output_format(output_format)
+
+    if handle_code_generation(["inference", "prompts"], "update", all_kwargs, output_format, state):
+        return
+
+    client = state.get_client()
+    result = client.inference.prompts.update(**all_kwargs)
+
+    format_output(
+        result,
+        is_list=False,
+        output_format=output_format,
+        no_truncate=state.get_no_truncate(),
+        timestamp_format=state.get_timestamp_format(),
+    )
diff --git a/sdk/python/nemo-platform/src/nemo_platform/cli/commands/api/inference/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/cli/commands/api/inference/__init__.py
index d90491be36..de8284384c 100644
--- a/sdk/python/nemo-platform/src/nemo_platform/cli/commands/api/inference/__init__.py
+++ b/sdk/python/nemo-platform/src/nemo_platform/cli/commands/api/inference/__init__.py
@@ -13,6 +13,7 @@
     deployments,
     gateway,
     models,
+    prompts,
     providers,
     virtual_models,
 )
@@ -26,6 +27,7 @@
 app.add_typer(deployments.app, name="deployments")
 app.add_typer(gateway.app, name="gateway")
 app.add_typer(models.app, name="models")
+app.add_typer(prompts.app, name="prompts")
 app.add_typer(providers.app, name="providers")
 app.add_typer(virtual_models.app, name="virtual-models")
 
diff --git a/sdk/python/nemo-platform/src/nemo_platform/cli/commands/api/inference/prompts.py b/sdk/python/nemo-platform/src/nemo_platform/cli/commands/api/inference/prompts.py
new file mode 100644
index 0000000000..3b0324df87
--- /dev/null
+++ b/sdk/python/nemo-platform/src/nemo_platform/cli/commands/api/inference/prompts.py
@@ -0,0 +1,369 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# NOTE: This file is auto-generated
+from __future__ import annotations
+
+from typing import Annotated, Literal
+
+import typer
+
+from nemo_platform.cli.core.api import build_kwargs, merge_filter_dict
+from nemo_platform.cli.core.code_generator import handle_code_generation
+from nemo_platform.cli.core.context import CLIContext
+from nemo_platform.cli.core.errors import handle_errors
+from nemo_platform.cli.core.formatters import Column, check_output_columns_with_format, format_output
+from nemo_platform.cli.core.help_formatter import collect_warnings, create_typer_app
+from nemo_platform.cli.core.pagination import PaginationType, fetch_all_pages, warn_if_more_pages
+from nemo_platform.cli.core.stdin_utils import read_data_input_with_flags, read_payload, validate_required_fields
+from nemo_platform.cli.core.types import (
+    EntityOutputFormatOption,
+    ListOutputFormatOption,
+    NoTruncateOption,
+    OutputColumnsOption,
+)
+
+app = create_typer_app(name="prompts", help="Manage prompts")
+
+
+@app.command("create")
+@collect_warnings
+@handle_errors
+def create_prompts(
+    ctx: typer.Context,
+    name: Annotated[str | None, typer.Argument(help="Name of the prompt. (required)")] = None,
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+    description: Annotated[str | None, typer.Option("--description")] = None,
+    inference_params: Annotated[
+        str | None,
+        typer.Option(
+            "--inference-params",
+            help="Parameters for model inference. Extra fields can be supplied for additional options applied to the inference request directly. Fields not supported by the model may cause inference errors during evaluation. (JSON string)",
+        ),
+    ] = None,
+    input_variables: Annotated[
+        list[str] | None, typer.Option("--input-variables", help="Can be repeated for multiple values")
+    ] = None,
+    messages: Annotated[str | None, typer.Option("--messages", help="JSON string")] = None,
+    project: Annotated[
+        str | None, typer.Option("--project", help="The URN of the project associated with this prompt.")
+    ] = None,
+    response_format: Annotated[str | None, typer.Option("--response-format", help="JSON string")] = None,
+    tags: Annotated[list[str] | None, typer.Option("--tags", help="Can be repeated for multiple values")] = None,
+    tool_choice: Annotated[str | None, typer.Option("--tool-choice", help="JSON string")] = None,
+    tools: Annotated[str | None, typer.Option("--tools", help="JSON string")] = None,
+    exist_ok: Annotated[
+        bool | None,
+        typer.Option(
+            "--exist-ok", help="Do not raise an error if the resource already exists. Returns the existing resource."
+        ),
+    ] = None,
+    input_file: Annotated[
+        str | None,
+        typer.Option("--input-file", help="Path to JSON file (use '-' for stdin)", rich_help_panel="Input Options"),
+    ] = None,
+    input_data: Annotated[
+        str | None,
+        typer.Option("--input-data", help="Input data for the request (JSON or YAML)", rich_help_panel="Input Options"),
+    ] = None,
+    output_format: EntityOutputFormatOption = None,
+) -> None:
+    """Create a new prompt.
+
+    [bold red]Required fields:[/] name
+
+    [green]Examples:[/]
+    nemo inference prompts create <name> --input-file config.json
+    nemo inference prompts create <name> --input-data '{"name": "value"}'
+    echo '{"json": "data"}' | nemo inference prompts create <name> --input-file -
+    nemo inference prompts create <name> --<option> "value"
+    """
+    # Read base input (optional if all fields provided via flags)
+    if input_file or input_data:
+        input_payload = read_data_input_with_flags(input_file=input_file, input_data=input_data)
+    else:
+        input_payload = {}
+
+    # Apply CLI flag overrides (flags take precedence)
+    if workspace is not None:
+        input_payload["workspace"] = workspace
+    if name is not None:
+        input_payload["name"] = name
+    if description is not None:
+        input_payload["description"] = description
+    if inference_params is not None:
+        input_payload["inference_params"] = read_payload("inference_params", inference_params)
+    if input_variables:  # Check for non-empty list
+        input_payload["input_variables"] = input_variables
+    if messages is not None:
+        input_payload["messages"] = read_payload("messages", messages)
+    if project is not None:
+        input_payload["project"] = project
+    if response_format is not None:
+        input_payload["response_format"] = read_payload("response_format", response_format)
+    if tags:  # Check for non-empty list
+        input_payload["tags"] = tags
+    if tool_choice is not None:
+        input_payload["tool_choice"] = read_payload("tool_choice", tool_choice)
+    if tools is not None:
+        input_payload["tools"] = read_payload("tools", tools)
+    if exist_ok is not None:
+        input_payload["exist_ok"] = exist_ok
+    # Validate required fields are present after merging
+    validate_required_fields(
+        input_payload,
+        ["name"],
+        "inference prompts create",
+        {
+            "name": "Name of the prompt. (required)",
+        },
+    )
+
+    all_kwargs = input_payload
+    state: CLIContext = ctx.obj
+    output_format = state.get_output_format(output_format)
+
+    if handle_code_generation(["inference", "prompts"], "create", all_kwargs, output_format, state):
+        return
+
+    client = state.get_client()
+    result = client.inference.prompts.create(**all_kwargs)
+
+    format_output(
+        result,
+        is_list=False,
+        output_format=output_format,
+        no_truncate=state.get_no_truncate(),
+        timestamp_format=state.get_timestamp_format(),
+    )
+
+
+@app.command("delete")
+@collect_warnings
+@handle_errors
+def delete_prompts(
+    ctx: typer.Context,
+    name: Annotated[str, typer.Argument()],
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+) -> None:
+    """Delete a prompt by workspace and name."""
+    state: CLIContext = ctx.obj
+    client = state.get_client()
+
+    kwargs = build_kwargs(
+        workspace=workspace,
+    )
+    client.inference.prompts.delete(name, **kwargs)
+
+    typer.echo("✓ Deleted successfully")
+
+
+@app.command("list")
+@collect_warnings
+@handle_errors
+def list_prompts(
+    ctx: typer.Context,
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+    filter: Annotated[
+        str | None,
+        typer.Option(
+            "--filter",
+            metavar="FILTER_JSON",
+            help="Use --filter with JSON for complex/nested queries, or --filter.FIELD options for simple fields. Both can be combined, with field options taking precedence.\nJSON-only fields:\n  created_at: {gte: str, lte: str}\n  updated_at: {gte: str, lte: str}\n\nFilter prompts by workspace, project, name, description, created_at, and updated_at.",
+            rich_help_panel="Filter Options",
+        ),
+    ] = None,
+    filter_description: Annotated[
+        str | None, typer.Option("--filter.description", rich_help_panel="Filter Options")
+    ] = None,
+    filter_name: Annotated[str | None, typer.Option("--filter.name", rich_help_panel="Filter Options")] = None,
+    filter_project: Annotated[str | None, typer.Option("--filter.project", rich_help_panel="Filter Options")] = None,
+    filter_workspace: Annotated[
+        str | None, typer.Option("--filter.workspace", rich_help_panel="Filter Options")
+    ] = None,
+    page: Annotated[int | None, typer.Option("--page", help="Page number.")] = None,
+    page_size: Annotated[int | None, typer.Option("--page-size", help="Page size.")] = None,
+    sort: Annotated[
+        Literal["name", "-name", "created_at", "-created_at", "updated_at", "-updated_at"] | None,
+        typer.Option(
+            "--sort", help="The field to sort by. To sort in decreasing order, use `-` in front of the field name."
+        ),
+    ] = None,
+    output_format: ListOutputFormatOption = None,
+    no_truncate: NoTruncateOption = None,
+    columns: OutputColumnsOption = None,
+    all_pages: Annotated[bool, typer.Option("--all-pages", help="Fetch all pages")] = False,
+) -> None:
+    """List prompts for a specific workspace."""
+    state: CLIContext = ctx.obj
+    output_format = state.get_output_format(output_format)
+
+    check_output_columns_with_format(columns, output_format)
+
+    default_columns = [
+        Column("name", None),
+        Column("workspace", None),
+        Column("created_at", None),
+    ]
+    if columns is None or str(columns).strip() == "default":
+        columns = default_columns
+
+    kwargs = build_kwargs(
+        workspace=workspace,
+        filter=merge_filter_dict(
+            filter, description=filter_description, name=filter_name, project=filter_project, workspace=filter_workspace
+        ),
+        page=page,
+        page_size=page_size,
+        sort=sort,
+    )
+
+    if handle_code_generation(["inference", "prompts"], "list", kwargs, output_format, state):
+        return
+
+    client = state.get_client()
+    path_args = ()
+    pagination_type = PaginationType.PAGE_NUMBER
+    if all_pages:
+        items = fetch_all_pages(
+            client.inference.prompts.list,
+            path_args=path_args,
+            body_args=kwargs,
+            pagination_type=pagination_type,
+        )
+    else:
+        items = client.inference.prompts.list(*path_args, **kwargs)
+
+    format_output(
+        items,
+        is_list=True,
+        output_format=output_format,
+        output_columns=columns,
+        no_truncate=state.get_no_truncate(no_truncate),
+        timestamp_format=state.get_timestamp_format(),
+    )
+    if not all_pages:
+        warn_if_more_pages(items, pagination_type)
+
+
+@app.command("get")
+@collect_warnings
+@handle_errors
+def retrieve_prompts(
+    ctx: typer.Context,
+    name: Annotated[str, typer.Argument()],
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+    output_format: EntityOutputFormatOption = None,
+) -> None:
+    """Get a prompt by workspace and name."""
+    state: CLIContext = ctx.obj
+    output_format = state.get_output_format(output_format)
+
+    kwargs = build_kwargs(
+        workspace=workspace,
+    )
+    if handle_code_generation(["inference", "prompts"], "retrieve", kwargs, output_format, state):
+        return
+
+    client = state.get_client()
+    result = client.inference.prompts.retrieve(name, **kwargs)
+
+    format_output(
+        result,
+        is_list=False,
+        output_format=output_format,
+        no_truncate=state.get_no_truncate(),
+        timestamp_format=state.get_timestamp_format(),
+    )
+
+
+@app.command("update")
+@collect_warnings
+@handle_errors
+def update_prompts(
+    ctx: typer.Context,
+    name: Annotated[str, typer.Argument()],
+    workspace: Annotated[str | None, typer.Option("--workspace")] = None,
+    description: Annotated[str | None, typer.Option("--description")] = None,
+    inference_params: Annotated[
+        str | None,
+        typer.Option(
+            "--inference-params",
+            help="Parameters for model inference. Extra fields can be supplied for additional options applied to the inference request directly. Fields not supported by the model may cause inference errors during evaluation. (JSON string)",
+        ),
+    ] = None,
+    input_variables: Annotated[
+        list[str] | None, typer.Option("--input-variables", help="Can be repeated for multiple values")
+    ] = None,
+    messages: Annotated[str | None, typer.Option("--messages", help="JSON string")] = None,
+    project: Annotated[
+        str | None, typer.Option("--project", help="The URN of the project associated with this prompt.")
+    ] = None,
+    response_format: Annotated[str | None, typer.Option("--response-format", help="JSON string")] = None,
+    tags: Annotated[list[str] | None, typer.Option("--tags", help="Can be repeated for multiple values")] = None,
+    tool_choice: Annotated[str | None, typer.Option("--tool-choice", help="JSON string")] = None,
+    tools: Annotated[str | None, typer.Option("--tools", help="JSON string")] = None,
+    input_file: Annotated[
+        str | None,
+        typer.Option("--input-file", help="Path to JSON file (use '-' for stdin)", rich_help_panel="Input Options"),
+    ] = None,
+    input_data: Annotated[
+        str | None,
+        typer.Option("--input-data", help="Input data for the request (JSON or YAML)", rich_help_panel="Input Options"),
+    ] = None,
+    output_format: EntityOutputFormatOption = None,
+) -> None:
+    """Update an existing prompt (full replacement of mutable fields).
+
+    [green]Examples:[/]
+    nemo inference prompts update <name> --input-file config.json
+    nemo inference prompts update <name> --input-data '{"field": "value"}'
+    echo '{"json": "data"}' | nemo inference prompts update <name> --input-file -
+    nemo inference prompts update <name> --<option> "value"
+    """
+    # Read base input (optional if all fields provided via flags)
+    if input_file or input_data:
+        input_payload = read_data_input_with_flags(input_file=input_file, input_data=input_data)
+    else:
+        input_payload = {}
+
+    # Apply CLI flag overrides (flags take precedence)
+    if workspace is not None:
+        input_payload["workspace"] = workspace
+    if description is not None:
+        input_payload["description"] = description
+    if inference_params is not None:
+        input_payload["inference_params"] = read_payload("inference_params", inference_params)
+    if input_variables:  # Check for non-empty list
+        input_payload["input_variables"] = input_variables
+    if messages is not None:
+        input_payload["messages"] = read_payload("messages", messages)
+    if project is not None:
+        input_payload["project"] = project
+    if response_format is not None:
+        input_payload["response_format"] = read_payload("response_format", response_format)
+    if tags:  # Check for non-empty list
+        input_payload["tags"] = tags
+    if tool_choice is not None:
+        input_payload["tool_choice"] = read_payload("tool_choice", tool_choice)
+    if tools is not None:
+        input_payload["tools"] = read_payload("tools", tools)
+
+    all_kwargs = {"name": name, **input_payload}
+
+    state: CLIContext = ctx.obj
+    output_format = state.get_output_format(output_format)
+
+    if handle_code_generation(["inference", "prompts"], "update", all_kwargs, output_format, state):
+        return
+
+    client = state.get_client()
+    result = client.inference.prompts.update(**all_kwargs)
+
+    format_output(
+        result,
+        is_list=False,
+        output_format=output_format,
+        no_truncate=state.get_no_truncate(),
+        timestamp_format=state.get_timestamp_format(),
+    )

From 634e824a650dc04cb246e4d704d8e2dd02f2424c Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Thu, 11 Jun 2026 17:43:42 -0700
Subject: [PATCH 09/10] fix auth

Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 docs/auth/authorization/permissions-reference.mdx     |  3 +++
 .../auth/src/nmp/core/auth/assets/static-authz.yaml   | 11 +++++++++++
 2 files changed, 14 insertions(+)

diff --git a/docs/auth/authorization/permissions-reference.mdx b/docs/auth/authorization/permissions-reference.mdx
index e46b8452e4..bfde3e70c6 100644
--- a/docs/auth/authorization/permissions-reference.mdx
+++ b/docs/auth/authorization/permissions-reference.mdx
@@ -89,6 +89,9 @@ PlatformAdmin is omitted — it bypasses permission checks entirely at the polic
 | `models.(create \\| update \\| delete)` | Create, update, delete models |  | ✓ | ✓ |
 | `models.adapters.(read \\| list)` | Read, list models adapters | ✓ | ✓ | ✓ |
 | `models.adapters.(create \\| update \\| delete)` | Create, update, delete models adapters |  | ✓ | ✓ |
+| `models.prompts.read` | Read model prompts | ✓ | ✓ | ✓ |
+| `models.prompts.(create \\| update \\| delete)` | Create, update, delete models prompts |  | ✓ | ✓ |
+| `models.prompts.list` | List model prompts |  |  |  |
 | `models.tool-call-plugin.set` | Whether this user can set tool_call_plugin on Models or Deployment Configs *(policy-enforced)* |  |  | ✓ |
 | `models.trust-remote-code.set` | Whether this user can set trust_remote_code on Models *(policy-enforced)* |  |  | ✓ |
 
diff --git a/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml b/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml
index eb3a65c0ca..2b40eb7fb8 100644
--- a/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml
+++ b/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml
@@ -182,6 +182,17 @@ authz:
           description: "Update model adapters"
         delete:
           description: "Delete model adapters"
+      prompts:
+        read:
+          description: "Read model prompts"
+        list:
+          description: "List model prompts"
+        create:
+          description: "Create model prompts"
+        update:
+          description: "Update model prompts"
+        delete:
+          description: "Delete model prompts"
       create:
         description: "Create models"
       delete:

From cfb9f2280e5a5c7f0a8aa85881947804bbf62eb4 Mon Sep 17 00:00:00 2001
From: Sean Teramae <steramae@nvidia.com>
Date: Fri, 12 Jun 2026 09:46:27 -0700
Subject: [PATCH 10/10] fix(models): correct FilesetMetadata import path in
 model entity tests

FilesetMetadata lives in nemo_platform.types.files, not nemo_platform.types.shared.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Signed-off-by: Sean Teramae <steramae@nvidia.com>
---
 .../tests/integration/test_model_entity_service_integration.py | 3 +--
 .../core/models/tests/unit/test_model_entity_service_unit.py   | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/services/core/models/tests/integration/test_model_entity_service_integration.py b/services/core/models/tests/integration/test_model_entity_service_integration.py
index 6611a27e82..ab008ccbc8 100644
--- a/services/core/models/tests/integration/test_model_entity_service_integration.py
+++ b/services/core/models/tests/integration/test_model_entity_service_integration.py
@@ -8,8 +8,7 @@
 import pytest
 from nemo_platform import AsyncNeMoPlatform
 from nemo_platform.filesets import ListFilesResponse
-from nemo_platform.types.files import Fileset, FilesetFile, LocalStorageConfig
-from nemo_platform.types.shared import FilesetMetadata
+from nemo_platform.types.files import Fileset, FilesetFile, FilesetMetadata, LocalStorageConfig
 from nmp.common.api.filter import ComparisonOperation, FilterOperator, LogicalOperation
 from nmp.common.api.parsed_filter import ParsedFilter
 from nmp.common.entities.client import EntityClient
diff --git a/services/core/models/tests/unit/test_model_entity_service_unit.py b/services/core/models/tests/unit/test_model_entity_service_unit.py
index 63b7c264be..821525be83 100644
--- a/services/core/models/tests/unit/test_model_entity_service_unit.py
+++ b/services/core/models/tests/unit/test_model_entity_service_unit.py
@@ -14,11 +14,11 @@
 from nemo_platform.types.files import (
     Fileset,
     FilesetFile,
+    FilesetMetadata,
     HuggingfaceStorageConfig,
     LocalStorageConfig,
     NGCStorageConfig,
 )
-from nemo_platform.types.shared import FilesetMetadata
 from nmp.common.api.common import Page, PaginationData
 from nmp.common.api.filter import ComparisonOperation, FilterOperator, LogicalOperation
 from nmp.common.api.parsed_filter import ParsedFilter