From 80722f2dec7c392997ae0796490078a0a0c5bc56 Mon Sep 17 00:00:00 2001 From: Sean Teramae Date: Tue, 9 Jun 2026 15:20:13 -0700 Subject: [PATCH 01/10] feat(models): Add prompt entity Signed-off-by: Sean Teramae --- openapi/ga/individual/platform.openapi.yaml | 560 ++++++++++++++++++ openapi/ga/openapi.yaml | 560 ++++++++++++++++++ openapi/openapi.yaml | 560 ++++++++++++++++++ sdk/stainless.yaml | 18 + .../src/nmp/core/models/api/dependencies.py | 8 + .../core/models/api/service/prompt_service.py | 175 ++++++ .../src/nmp/core/models/api/v2/prompts.py | 186 ++++++ .../models/src/nmp/core/models/entities.py | 53 ++ .../models/src/nmp/core/models/schemas.py | 212 ++++++- .../models/src/nmp/core/models/service.py | 7 +- .../models/tests/unit/api/test_prompts_api.py | 244 ++++++++ .../tests/unit/test_prompt_service_unit.py | 231 ++++++++ 12 files changed, 2812 insertions(+), 2 deletions(-) create mode 100644 services/core/models/src/nmp/core/models/api/service/prompt_service.py create mode 100644 services/core/models/src/nmp/core/models/api/v2/prompts.py create mode 100644 services/core/models/tests/unit/api/test_prompts_api.py create mode 100644 services/core/models/tests/unit/test_prompt_service_unit.py diff --git a/openapi/ga/individual/platform.openapi.yaml b/openapi/ga/individual/platform.openapi.yaml index b0ebfd2e13..96e7ade804 100644 --- a/openapi/ga/individual/platform.openapi.yaml +++ b/openapi/ga/individual/platform.openapi.yaml @@ -6579,6 +6579,202 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /apis/models/v2/workspaces/{workspace}/prompts: + get: + tags: + - Prompts + summary: List Prompts By Workspace + description: List prompts for a specific workspace. + operationId: list_prompts_apis_models_v2_workspaces__workspace__prompts_get + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: page + in: query + required: false + schema: + type: integer + description: Page number. + default: 1 + title: Page + description: Page number. + - name: page_size + in: query + required: false + schema: + type: integer + description: Page size. + default: 100 + title: Page Size + description: Page size. + - name: sort + in: query + required: false + schema: + allOf: + - $ref: '#/components/schemas/PromptSort' + description: The field to sort by. To sort in decreasing order, use `-` + in front of the field name. + default: created_at + description: The field to sort by. To sort in decreasing order, use `-` in + front of the field name. + - in: query + name: filter + style: deepObject + required: false + explode: true + schema: + $ref: '#/components/schemas/PromptFilter' + description: Filter prompts by workspace, project, name, description, created_at, + and updated_at. + responses: + '200': + description: Return prompts for a workspace + content: + application/json: + schema: + $ref: '#/components/schemas/PromptsPage' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + post: + tags: + - Prompts + summary: Create Prompt + description: Create a new prompt. + operationId: create_prompt_apis_models_v2_workspaces__workspace__prompts_post + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePromptRequest' + responses: + '201': + description: Create a new prompt + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /apis/models/v2/workspaces/{workspace}/prompts/{name}: + get: + tags: + - Prompts + summary: Get Prompt + description: Get a prompt by workspace and name. + operationId: get_prompt_apis_models_v2_workspaces__workspace__prompts__name__get + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + responses: + '200': + description: Return prompt details + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + put: + tags: + - Prompts + summary: Update Prompt + description: Update an existing prompt (full replacement of mutable fields). + operationId: update_prompt_apis_models_v2_workspaces__workspace__prompts__name__put + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdatePromptRequest' + responses: + '200': + description: Update an existing prompt + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + delete: + tags: + - Prompts + summary: Delete Prompt + description: Delete a prompt by workspace and name. + operationId: delete_prompt_apis_models_v2_workspaces__workspace__prompts__name__delete + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + responses: + '204': + description: Delete a prompt + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' /apis/models/v2/workspaces/{workspace}/providers: get: tags: @@ -8204,6 +8400,24 @@ components: - role title: ChatCompletionSystemMessageParam description: System message parameter for chat completion. + ChatCompletionTool: + properties: + type: + type: string + const: function + title: Type + description: The type of the tool. Currently only 'function' is supported. + default: function + function: + allOf: + - $ref: '#/components/schemas/FunctionDefinition' + description: The function definition for this tool. + type: object + required: + - function + title: ChatCompletionTool + description: An OpenAI-compatible tool definition (currently always a function + tool). ChatCompletionToolMessageParam: properties: content: @@ -8910,6 +9124,65 @@ components: - source title: CreatePlatformJobRequest description: Request model for creating a new platform job. + CreatePromptRequest: + properties: + name: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Name + description: 'Name of the prompt. Allowed characters: letters (a-z, A-Z), + digits (0-9), underscores, hyphens, and dots.' + examples: + - support-bot-system + - summarizer + project: + title: Project + description: The URN of the project associated with this prompt. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + description: + title: Description + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + input_variables: + items: + type: string + type: array + title: Input Variables + tools: + title: Tools + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + response_format: + title: Response Format + additionalProperties: true + type: object + inference_params: + $ref: '#/components/schemas/InferenceParams' + tags: + title: Tags + items: + type: string + type: array + type: object + required: + - name + title: CreatePromptRequest + description: Request model for creating a Prompt. CreateVirtualModelRequest: properties: default_model_entity: @@ -10512,6 +10785,39 @@ components: - name title: FunctionCall description: Function call information. + FunctionDefinition: + properties: + name: + type: string + maxLength: 255 + title: Name + description: The name of the function to be called. + description: + title: Description + description: A description of what the function does, used by the model + to decide when and how to call it. + type: string + parameters: + title: Parameters + description: The parameters the function accepts, described as a JSON Schema + object. + additionalProperties: true + type: object + strict: + title: Strict + description: Whether to enforce strict schema adherence when generating + the function call. + type: boolean + type: object + required: + - name + title: FunctionDefinition + description: 'An OpenAI-compatible function definition for tool calling. + + + Mirrors the ``function`` object the Inference Gateway forwards to + + OpenAI-compatible backends.' GLiNERDetection: properties: server_endpoint: @@ -15064,6 +15370,110 @@ components: required: - data title: ProjectsPage + Prompt: + properties: + id: + type: string + title: Id + description: Unique identifier for the prompt. + name: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Name + description: 'Name of the entity. Name/workspace combo must be unique across + all entities. Allowed characters: letters (a-z, A-Z), digits (0-9), underscores, + hyphens, and dots.' + examples: + - llama-3.1-8b + - my-custom-model + workspace: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Workspace + description: 'The workspace of the entity. Allowed characters: letters (a-z, + A-Z), digits (0-9), underscores, hyphens, and dots.' + project: + title: Project + description: The URN of the project associated with this entity. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + created_at: + type: string + format: date-time + title: Created At + description: The timestamp of model entity creation + updated_at: + type: string + format: date-time + title: Updated At + description: The timestamp of the last model entity update + description: + title: Description + description: Optional description of the prompt. + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + description: Ordered list of chat messages that make up the prompt. + input_variables: + items: + type: string + type: array + title: Input Variables + description: Names of the Jinja2 template variables the prompt expects. + tools: + title: Tools + description: Optional OpenAI-compatible tool definitions to send with the + prompt. + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + description: 'Controls which (if any) tool is called: ''none'', ''auto'', + ''required'', or a named-tool object.' + response_format: + title: Response Format + description: Optional OpenAI-compatible response_format, e.g. a json_schema + structured-output spec. + additionalProperties: true + type: object + inference_params: + allOf: + - $ref: '#/components/schemas/InferenceParams' + description: Optional default model and sampling parameters (temperature, + top_p, max_tokens, ...). + tags: + items: + type: string + type: array + title: Tags + description: Optional free-form tags for organizing prompts. + type: object + required: + - name + - workspace + - created_at + - updated_at + title: Prompt + description: 'A reusable, stored chat prompt. + + + A Prompt captures the messages, declared template variables, optional tool + + definitions, and default inference parameters needed to invoke a model + + through the Inference Gateway. The unique identifier is workspace/name.' PromptData: properties: system_prompt: @@ -15086,6 +15496,106 @@ components: type: object title: PromptData description: Configuration for prompt engineering. + PromptFilter: + additionalProperties: false + description: Filter for Prompt queries. + properties: + workspace: + description: Filter by workspace. + title: Workspace + type: string + project: + description: Filter by project URN. + title: Project + type: string + name: + description: Filter by name. + title: Name + type: string + description: + description: Filter by description. + title: Description + type: string + created_at: + allOf: + - $ref: '#/components/schemas/DatetimeFilter' + description: Filter by creation date. + updated_at: + allOf: + - $ref: '#/components/schemas/DatetimeFilter' + description: Filter by update date. + title: PromptFilter + type: object + PromptMessage: + properties: + role: + allOf: + - $ref: '#/components/schemas/PromptMessageRole' + description: The role of the message author. + content: + type: string + title: Content + description: Templated message content. May contain template variables. + type: object + required: + - role + - content + title: PromptMessage + description: 'A single templated message in a chat prompt. + + + ``content`` is a Jinja2 template body that may reference the prompt''s + + declared ``input_variables`` (e.g. ``{{ topic }}``).' + PromptMessageRole: + type: string + enum: + - system + - developer + - user + - assistant + title: PromptMessageRole + description: 'Role of a message author in a chat prompt. + + + Follows the OpenAI chat schema the Inference Gateway speaks + + (``/v1/chat/completions``).' + PromptSort: + type: string + enum: + - name + - -name + - created_at + - -created_at + - updated_at + - -updated_at + title: PromptSort + description: Sort fields for Prompt queries. + PromptsPage: + properties: + data: + items: + $ref: '#/components/schemas/Prompt' + type: array + title: Data + pagination: + allOf: + - $ref: '#/components/schemas/PaginationData' + description: Pagination information. + sort: + title: Sort + description: The field on which the results are sorted. + type: string + filter: + title: Filter + description: Filtering information. + additionalProperties: true + type: object + type: object + required: + - data + title: PromptsPage RailStatus: properties: status: @@ -16870,6 +17380,56 @@ components: This endpoint supports partial updates for fields managed by Models Controller.' + UpdatePromptRequest: + properties: + project: + title: Project + description: The URN of the project associated with this prompt. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + description: + title: Description + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + input_variables: + items: + type: string + type: array + title: Input Variables + tools: + title: Tools + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + response_format: + title: Response Format + additionalProperties: true + type: object + inference_params: + $ref: '#/components/schemas/InferenceParams' + tags: + title: Tags + items: + type: string + type: array + type: object + title: UpdatePromptRequest + description: 'Request model for replacing a Prompt''s mutable fields (full update). + + + The prompt name and workspace come from the URL path and cannot be changed.' UpdateVirtualModelRequest: properties: default_model_entity: diff --git a/openapi/ga/openapi.yaml b/openapi/ga/openapi.yaml index b0ebfd2e13..96e7ade804 100644 --- a/openapi/ga/openapi.yaml +++ b/openapi/ga/openapi.yaml @@ -6579,6 +6579,202 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /apis/models/v2/workspaces/{workspace}/prompts: + get: + tags: + - Prompts + summary: List Prompts By Workspace + description: List prompts for a specific workspace. + operationId: list_prompts_apis_models_v2_workspaces__workspace__prompts_get + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: page + in: query + required: false + schema: + type: integer + description: Page number. + default: 1 + title: Page + description: Page number. + - name: page_size + in: query + required: false + schema: + type: integer + description: Page size. + default: 100 + title: Page Size + description: Page size. + - name: sort + in: query + required: false + schema: + allOf: + - $ref: '#/components/schemas/PromptSort' + description: The field to sort by. To sort in decreasing order, use `-` + in front of the field name. + default: created_at + description: The field to sort by. To sort in decreasing order, use `-` in + front of the field name. + - in: query + name: filter + style: deepObject + required: false + explode: true + schema: + $ref: '#/components/schemas/PromptFilter' + description: Filter prompts by workspace, project, name, description, created_at, + and updated_at. + responses: + '200': + description: Return prompts for a workspace + content: + application/json: + schema: + $ref: '#/components/schemas/PromptsPage' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + post: + tags: + - Prompts + summary: Create Prompt + description: Create a new prompt. + operationId: create_prompt_apis_models_v2_workspaces__workspace__prompts_post + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePromptRequest' + responses: + '201': + description: Create a new prompt + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /apis/models/v2/workspaces/{workspace}/prompts/{name}: + get: + tags: + - Prompts + summary: Get Prompt + description: Get a prompt by workspace and name. + operationId: get_prompt_apis_models_v2_workspaces__workspace__prompts__name__get + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + responses: + '200': + description: Return prompt details + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + put: + tags: + - Prompts + summary: Update Prompt + description: Update an existing prompt (full replacement of mutable fields). + operationId: update_prompt_apis_models_v2_workspaces__workspace__prompts__name__put + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdatePromptRequest' + responses: + '200': + description: Update an existing prompt + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + delete: + tags: + - Prompts + summary: Delete Prompt + description: Delete a prompt by workspace and name. + operationId: delete_prompt_apis_models_v2_workspaces__workspace__prompts__name__delete + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + responses: + '204': + description: Delete a prompt + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' /apis/models/v2/workspaces/{workspace}/providers: get: tags: @@ -8204,6 +8400,24 @@ components: - role title: ChatCompletionSystemMessageParam description: System message parameter for chat completion. + ChatCompletionTool: + properties: + type: + type: string + const: function + title: Type + description: The type of the tool. Currently only 'function' is supported. + default: function + function: + allOf: + - $ref: '#/components/schemas/FunctionDefinition' + description: The function definition for this tool. + type: object + required: + - function + title: ChatCompletionTool + description: An OpenAI-compatible tool definition (currently always a function + tool). ChatCompletionToolMessageParam: properties: content: @@ -8910,6 +9124,65 @@ components: - source title: CreatePlatformJobRequest description: Request model for creating a new platform job. + CreatePromptRequest: + properties: + name: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Name + description: 'Name of the prompt. Allowed characters: letters (a-z, A-Z), + digits (0-9), underscores, hyphens, and dots.' + examples: + - support-bot-system + - summarizer + project: + title: Project + description: The URN of the project associated with this prompt. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + description: + title: Description + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + input_variables: + items: + type: string + type: array + title: Input Variables + tools: + title: Tools + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + response_format: + title: Response Format + additionalProperties: true + type: object + inference_params: + $ref: '#/components/schemas/InferenceParams' + tags: + title: Tags + items: + type: string + type: array + type: object + required: + - name + title: CreatePromptRequest + description: Request model for creating a Prompt. CreateVirtualModelRequest: properties: default_model_entity: @@ -10512,6 +10785,39 @@ components: - name title: FunctionCall description: Function call information. + FunctionDefinition: + properties: + name: + type: string + maxLength: 255 + title: Name + description: The name of the function to be called. + description: + title: Description + description: A description of what the function does, used by the model + to decide when and how to call it. + type: string + parameters: + title: Parameters + description: The parameters the function accepts, described as a JSON Schema + object. + additionalProperties: true + type: object + strict: + title: Strict + description: Whether to enforce strict schema adherence when generating + the function call. + type: boolean + type: object + required: + - name + title: FunctionDefinition + description: 'An OpenAI-compatible function definition for tool calling. + + + Mirrors the ``function`` object the Inference Gateway forwards to + + OpenAI-compatible backends.' GLiNERDetection: properties: server_endpoint: @@ -15064,6 +15370,110 @@ components: required: - data title: ProjectsPage + Prompt: + properties: + id: + type: string + title: Id + description: Unique identifier for the prompt. + name: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Name + description: 'Name of the entity. Name/workspace combo must be unique across + all entities. Allowed characters: letters (a-z, A-Z), digits (0-9), underscores, + hyphens, and dots.' + examples: + - llama-3.1-8b + - my-custom-model + workspace: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Workspace + description: 'The workspace of the entity. Allowed characters: letters (a-z, + A-Z), digits (0-9), underscores, hyphens, and dots.' + project: + title: Project + description: The URN of the project associated with this entity. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + created_at: + type: string + format: date-time + title: Created At + description: The timestamp of model entity creation + updated_at: + type: string + format: date-time + title: Updated At + description: The timestamp of the last model entity update + description: + title: Description + description: Optional description of the prompt. + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + description: Ordered list of chat messages that make up the prompt. + input_variables: + items: + type: string + type: array + title: Input Variables + description: Names of the Jinja2 template variables the prompt expects. + tools: + title: Tools + description: Optional OpenAI-compatible tool definitions to send with the + prompt. + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + description: 'Controls which (if any) tool is called: ''none'', ''auto'', + ''required'', or a named-tool object.' + response_format: + title: Response Format + description: Optional OpenAI-compatible response_format, e.g. a json_schema + structured-output spec. + additionalProperties: true + type: object + inference_params: + allOf: + - $ref: '#/components/schemas/InferenceParams' + description: Optional default model and sampling parameters (temperature, + top_p, max_tokens, ...). + tags: + items: + type: string + type: array + title: Tags + description: Optional free-form tags for organizing prompts. + type: object + required: + - name + - workspace + - created_at + - updated_at + title: Prompt + description: 'A reusable, stored chat prompt. + + + A Prompt captures the messages, declared template variables, optional tool + + definitions, and default inference parameters needed to invoke a model + + through the Inference Gateway. The unique identifier is workspace/name.' PromptData: properties: system_prompt: @@ -15086,6 +15496,106 @@ components: type: object title: PromptData description: Configuration for prompt engineering. + PromptFilter: + additionalProperties: false + description: Filter for Prompt queries. + properties: + workspace: + description: Filter by workspace. + title: Workspace + type: string + project: + description: Filter by project URN. + title: Project + type: string + name: + description: Filter by name. + title: Name + type: string + description: + description: Filter by description. + title: Description + type: string + created_at: + allOf: + - $ref: '#/components/schemas/DatetimeFilter' + description: Filter by creation date. + updated_at: + allOf: + - $ref: '#/components/schemas/DatetimeFilter' + description: Filter by update date. + title: PromptFilter + type: object + PromptMessage: + properties: + role: + allOf: + - $ref: '#/components/schemas/PromptMessageRole' + description: The role of the message author. + content: + type: string + title: Content + description: Templated message content. May contain template variables. + type: object + required: + - role + - content + title: PromptMessage + description: 'A single templated message in a chat prompt. + + + ``content`` is a Jinja2 template body that may reference the prompt''s + + declared ``input_variables`` (e.g. ``{{ topic }}``).' + PromptMessageRole: + type: string + enum: + - system + - developer + - user + - assistant + title: PromptMessageRole + description: 'Role of a message author in a chat prompt. + + + Follows the OpenAI chat schema the Inference Gateway speaks + + (``/v1/chat/completions``).' + PromptSort: + type: string + enum: + - name + - -name + - created_at + - -created_at + - updated_at + - -updated_at + title: PromptSort + description: Sort fields for Prompt queries. + PromptsPage: + properties: + data: + items: + $ref: '#/components/schemas/Prompt' + type: array + title: Data + pagination: + allOf: + - $ref: '#/components/schemas/PaginationData' + description: Pagination information. + sort: + title: Sort + description: The field on which the results are sorted. + type: string + filter: + title: Filter + description: Filtering information. + additionalProperties: true + type: object + type: object + required: + - data + title: PromptsPage RailStatus: properties: status: @@ -16870,6 +17380,56 @@ components: This endpoint supports partial updates for fields managed by Models Controller.' + UpdatePromptRequest: + properties: + project: + title: Project + description: The URN of the project associated with this prompt. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + description: + title: Description + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + input_variables: + items: + type: string + type: array + title: Input Variables + tools: + title: Tools + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + response_format: + title: Response Format + additionalProperties: true + type: object + inference_params: + $ref: '#/components/schemas/InferenceParams' + tags: + title: Tags + items: + type: string + type: array + type: object + title: UpdatePromptRequest + description: 'Request model for replacing a Prompt''s mutable fields (full update). + + + The prompt name and workspace come from the URL path and cannot be changed.' UpdateVirtualModelRequest: properties: default_model_entity: diff --git a/openapi/openapi.yaml b/openapi/openapi.yaml index b0ebfd2e13..96e7ade804 100644 --- a/openapi/openapi.yaml +++ b/openapi/openapi.yaml @@ -6579,6 +6579,202 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /apis/models/v2/workspaces/{workspace}/prompts: + get: + tags: + - Prompts + summary: List Prompts By Workspace + description: List prompts for a specific workspace. + operationId: list_prompts_apis_models_v2_workspaces__workspace__prompts_get + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: page + in: query + required: false + schema: + type: integer + description: Page number. + default: 1 + title: Page + description: Page number. + - name: page_size + in: query + required: false + schema: + type: integer + description: Page size. + default: 100 + title: Page Size + description: Page size. + - name: sort + in: query + required: false + schema: + allOf: + - $ref: '#/components/schemas/PromptSort' + description: The field to sort by. To sort in decreasing order, use `-` + in front of the field name. + default: created_at + description: The field to sort by. To sort in decreasing order, use `-` in + front of the field name. + - in: query + name: filter + style: deepObject + required: false + explode: true + schema: + $ref: '#/components/schemas/PromptFilter' + description: Filter prompts by workspace, project, name, description, created_at, + and updated_at. + responses: + '200': + description: Return prompts for a workspace + content: + application/json: + schema: + $ref: '#/components/schemas/PromptsPage' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + post: + tags: + - Prompts + summary: Create Prompt + description: Create a new prompt. + operationId: create_prompt_apis_models_v2_workspaces__workspace__prompts_post + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePromptRequest' + responses: + '201': + description: Create a new prompt + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /apis/models/v2/workspaces/{workspace}/prompts/{name}: + get: + tags: + - Prompts + summary: Get Prompt + description: Get a prompt by workspace and name. + operationId: get_prompt_apis_models_v2_workspaces__workspace__prompts__name__get + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + responses: + '200': + description: Return prompt details + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + put: + tags: + - Prompts + summary: Update Prompt + description: Update an existing prompt (full replacement of mutable fields). + operationId: update_prompt_apis_models_v2_workspaces__workspace__prompts__name__put + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdatePromptRequest' + responses: + '200': + description: Update an existing prompt + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + delete: + tags: + - Prompts + summary: Delete Prompt + description: Delete a prompt by workspace and name. + operationId: delete_prompt_apis_models_v2_workspaces__workspace__prompts__name__delete + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + responses: + '204': + description: Delete a prompt + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' /apis/models/v2/workspaces/{workspace}/providers: get: tags: @@ -8204,6 +8400,24 @@ components: - role title: ChatCompletionSystemMessageParam description: System message parameter for chat completion. + ChatCompletionTool: + properties: + type: + type: string + const: function + title: Type + description: The type of the tool. Currently only 'function' is supported. + default: function + function: + allOf: + - $ref: '#/components/schemas/FunctionDefinition' + description: The function definition for this tool. + type: object + required: + - function + title: ChatCompletionTool + description: An OpenAI-compatible tool definition (currently always a function + tool). ChatCompletionToolMessageParam: properties: content: @@ -8910,6 +9124,65 @@ components: - source title: CreatePlatformJobRequest description: Request model for creating a new platform job. + CreatePromptRequest: + properties: + name: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Name + description: 'Name of the prompt. Allowed characters: letters (a-z, A-Z), + digits (0-9), underscores, hyphens, and dots.' + examples: + - support-bot-system + - summarizer + project: + title: Project + description: The URN of the project associated with this prompt. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + description: + title: Description + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + input_variables: + items: + type: string + type: array + title: Input Variables + tools: + title: Tools + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + response_format: + title: Response Format + additionalProperties: true + type: object + inference_params: + $ref: '#/components/schemas/InferenceParams' + tags: + title: Tags + items: + type: string + type: array + type: object + required: + - name + title: CreatePromptRequest + description: Request model for creating a Prompt. CreateVirtualModelRequest: properties: default_model_entity: @@ -10512,6 +10785,39 @@ components: - name title: FunctionCall description: Function call information. + FunctionDefinition: + properties: + name: + type: string + maxLength: 255 + title: Name + description: The name of the function to be called. + description: + title: Description + description: A description of what the function does, used by the model + to decide when and how to call it. + type: string + parameters: + title: Parameters + description: The parameters the function accepts, described as a JSON Schema + object. + additionalProperties: true + type: object + strict: + title: Strict + description: Whether to enforce strict schema adherence when generating + the function call. + type: boolean + type: object + required: + - name + title: FunctionDefinition + description: 'An OpenAI-compatible function definition for tool calling. + + + Mirrors the ``function`` object the Inference Gateway forwards to + + OpenAI-compatible backends.' GLiNERDetection: properties: server_endpoint: @@ -15064,6 +15370,110 @@ components: required: - data title: ProjectsPage + Prompt: + properties: + id: + type: string + title: Id + description: Unique identifier for the prompt. + name: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Name + description: 'Name of the entity. Name/workspace combo must be unique across + all entities. Allowed characters: letters (a-z, A-Z), digits (0-9), underscores, + hyphens, and dots.' + examples: + - llama-3.1-8b + - my-custom-model + workspace: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Workspace + description: 'The workspace of the entity. Allowed characters: letters (a-z, + A-Z), digits (0-9), underscores, hyphens, and dots.' + project: + title: Project + description: The URN of the project associated with this entity. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + created_at: + type: string + format: date-time + title: Created At + description: The timestamp of model entity creation + updated_at: + type: string + format: date-time + title: Updated At + description: The timestamp of the last model entity update + description: + title: Description + description: Optional description of the prompt. + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + description: Ordered list of chat messages that make up the prompt. + input_variables: + items: + type: string + type: array + title: Input Variables + description: Names of the Jinja2 template variables the prompt expects. + tools: + title: Tools + description: Optional OpenAI-compatible tool definitions to send with the + prompt. + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + description: 'Controls which (if any) tool is called: ''none'', ''auto'', + ''required'', or a named-tool object.' + response_format: + title: Response Format + description: Optional OpenAI-compatible response_format, e.g. a json_schema + structured-output spec. + additionalProperties: true + type: object + inference_params: + allOf: + - $ref: '#/components/schemas/InferenceParams' + description: Optional default model and sampling parameters (temperature, + top_p, max_tokens, ...). + tags: + items: + type: string + type: array + title: Tags + description: Optional free-form tags for organizing prompts. + type: object + required: + - name + - workspace + - created_at + - updated_at + title: Prompt + description: 'A reusable, stored chat prompt. + + + A Prompt captures the messages, declared template variables, optional tool + + definitions, and default inference parameters needed to invoke a model + + through the Inference Gateway. The unique identifier is workspace/name.' PromptData: properties: system_prompt: @@ -15086,6 +15496,106 @@ components: type: object title: PromptData description: Configuration for prompt engineering. + PromptFilter: + additionalProperties: false + description: Filter for Prompt queries. + properties: + workspace: + description: Filter by workspace. + title: Workspace + type: string + project: + description: Filter by project URN. + title: Project + type: string + name: + description: Filter by name. + title: Name + type: string + description: + description: Filter by description. + title: Description + type: string + created_at: + allOf: + - $ref: '#/components/schemas/DatetimeFilter' + description: Filter by creation date. + updated_at: + allOf: + - $ref: '#/components/schemas/DatetimeFilter' + description: Filter by update date. + title: PromptFilter + type: object + PromptMessage: + properties: + role: + allOf: + - $ref: '#/components/schemas/PromptMessageRole' + description: The role of the message author. + content: + type: string + title: Content + description: Templated message content. May contain template variables. + type: object + required: + - role + - content + title: PromptMessage + description: 'A single templated message in a chat prompt. + + + ``content`` is a Jinja2 template body that may reference the prompt''s + + declared ``input_variables`` (e.g. ``{{ topic }}``).' + PromptMessageRole: + type: string + enum: + - system + - developer + - user + - assistant + title: PromptMessageRole + description: 'Role of a message author in a chat prompt. + + + Follows the OpenAI chat schema the Inference Gateway speaks + + (``/v1/chat/completions``).' + PromptSort: + type: string + enum: + - name + - -name + - created_at + - -created_at + - updated_at + - -updated_at + title: PromptSort + description: Sort fields for Prompt queries. + PromptsPage: + properties: + data: + items: + $ref: '#/components/schemas/Prompt' + type: array + title: Data + pagination: + allOf: + - $ref: '#/components/schemas/PaginationData' + description: Pagination information. + sort: + title: Sort + description: The field on which the results are sorted. + type: string + filter: + title: Filter + description: Filtering information. + additionalProperties: true + type: object + type: object + required: + - data + title: PromptsPage RailStatus: properties: status: @@ -16870,6 +17380,56 @@ components: This endpoint supports partial updates for fields managed by Models Controller.' + UpdatePromptRequest: + properties: + project: + title: Project + description: The URN of the project associated with this prompt. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + description: + title: Description + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + input_variables: + items: + type: string + type: array + title: Input Variables + tools: + title: Tools + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + response_format: + title: Response Format + additionalProperties: true + type: object + inference_params: + $ref: '#/components/schemas/InferenceParams' + tags: + title: Tags + items: + type: string + type: array + type: object + title: UpdatePromptRequest + description: 'Request model for replacing a Prompt''s mutable fields (full update). + + + The prompt name and workspace come from the URL path and cannot be changed.' UpdateVirtualModelRequest: properties: default_model_entity: diff --git a/sdk/stainless.yaml b/sdk/stainless.yaml index d8a24cd46b..e000ff3ae4 100644 --- a/sdk/stainless.yaml +++ b/sdk/stainless.yaml @@ -462,6 +462,24 @@ resources: update: put /apis/models/v2/workspaces/{workspace}/providers/{name} delete: delete /apis/models/v2/workspaces/{workspace}/providers/{name} update_status: put /apis/models/v2/workspaces/{workspace}/providers/{name}/status + prompts: + models: + chat_completion_tool: ChatCompletionTool + create_prompt_request: CreatePromptRequest + function_definition: FunctionDefinition + prompt: Prompt + prompt_filter: PromptFilter + prompt_message: PromptMessage + prompt_message_role: PromptMessageRole + prompt_sort: PromptSort + prompts_page: PromptsPage + update_prompt_request: UpdatePromptRequest + methods: + list: get /apis/models/v2/workspaces/{workspace}/prompts + create: post /apis/models/v2/workspaces/{workspace}/prompts + retrieve: get /apis/models/v2/workspaces/{workspace}/prompts/{name} + update: put /apis/models/v2/workspaces/{workspace}/prompts/{name} + delete: delete /apis/models/v2/workspaces/{workspace}/prompts/{name} gateway: subresources: openai: diff --git a/services/core/models/src/nmp/core/models/api/dependencies.py b/services/core/models/src/nmp/core/models/api/dependencies.py index d43192cb6e..b50a2ade03 100644 --- a/services/core/models/src/nmp/core/models/api/dependencies.py +++ b/services/core/models/src/nmp/core/models/api/dependencies.py @@ -12,6 +12,7 @@ from nmp.core.models.api.service.model_deployment_service import ModelDeploymentService from nmp.core.models.api.service.model_entity_service import ModelEntityService from nmp.core.models.api.service.model_provider_service import ModelProviderService +from nmp.core.models.api.service.prompt_service import PromptService def get_model_entity_service( @@ -35,6 +36,13 @@ def get_model_provider_service( return ModelProviderService(entity_client) +def get_prompt_service( + entity_client: EntityClient = Depends(get_entity_client), +) -> PromptService: + """Dependency to get PromptService instance.""" + return PromptService(entity_client) + + def get_model_deployment_config_service( entity_client: EntityClient = Depends(get_entity_client), ) -> ModelDeploymentConfigService: diff --git a/services/core/models/src/nmp/core/models/api/service/prompt_service.py b/services/core/models/src/nmp/core/models/api/service/prompt_service.py new file mode 100644 index 0000000000..4754f91efb --- /dev/null +++ b/services/core/models/src/nmp/core/models/api/service/prompt_service.py @@ -0,0 +1,175 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Service layer for Prompt operations using EntityClient.""" + +import logging + +from nmp.common.api.common import Page, PaginationData +from nmp.common.api.filter import FilterOperation +from nmp.common.entities.client import EntityClient, EntityConflictError, EntityNotFoundError +from nmp.core.models.entities import Prompt as PromptEntity +from nmp.core.models.schemas import ( + CreatePromptRequest, + DeletePromptRequest, + GetPromptRequest, + Prompt, + UpdatePromptRequest, +) + +logger = logging.getLogger(__name__) + + +def _entity_to_schema(entity: PromptEntity) -> Prompt: + """Convert an EntityBase Prompt to the API schema.""" + return Prompt( + id=entity.id, + name=entity.name, + workspace=entity.workspace, + project=entity.project, + description=entity.description, + messages=entity.messages, + input_variables=entity.input_variables, + tools=entity.tools, + tool_choice=entity.tool_choice, + response_format=entity.response_format, + inference_params=entity.inference_params, + tags=entity.tags, + created_at=entity.created_at, + updated_at=entity.updated_at, + ) + + +class PromptService: + """Service layer for Prompt operations.""" + + def __init__(self, entity_client: EntityClient): + self.entity_client = entity_client + + async def create_prompt(self, request: CreatePromptRequest, workspace: str) -> Prompt: + """Create a new prompt.""" + logger.debug("Creating prompt", extra={"workspace": workspace, "prompt_name": request.name}) + + try: + await self.entity_client.get(PromptEntity, name=request.name, workspace=workspace) + logger.warning("Prompt already exists", extra={"workspace": workspace, "prompt_name": request.name}) + raise ValueError(f"Prompt with name '{request.name}' already exists in workspace '{workspace}'") + except EntityNotFoundError: + pass # Expected - prompt doesn't exist, proceed with creation + + entity = PromptEntity( + name=request.name, + workspace=workspace, + project=request.project, + description=request.description, + messages=request.messages, + input_variables=request.input_variables, + tools=request.tools, + tool_choice=request.tool_choice, + response_format=request.response_format, + inference_params=request.inference_params, + tags=request.tags or [], + ) + + try: + created = await self.entity_client.create(entity) + logger.info("Prompt created", extra={"workspace": created.workspace, "prompt_name": created.name}) + return _entity_to_schema(created) + except EntityConflictError as e: + logger.warning( + "Prompt already exists (conflict)", + extra={"workspace": workspace, "prompt_name": request.name}, + ) + raise ValueError(f"Prompt with name '{request.name}' already exists in workspace '{workspace}'") from e + + async def get_prompt(self, request: GetPromptRequest) -> Prompt | None: + """Get a prompt by workspace and name.""" + logger.debug("Getting prompt", extra={"workspace": request.workspace, "prompt_name": request.name}) + + try: + entity = await self.entity_client.get( + PromptEntity, + workspace=request.workspace, + name=request.name, + ) + return _entity_to_schema(entity) + except EntityNotFoundError: + logger.debug("Prompt not found", extra={"workspace": request.workspace, "prompt_name": request.name}) + return None + + async def list_prompts( + self, + workspace: str, + page: int = 1, + page_size: int = 100, + sort: str | None = None, + filter_operation: FilterOperation | None = None, + ) -> Page[Prompt]: + """List prompts with filtering and pagination.""" + logger.debug("Listing prompts", extra={"page": page, "page_size": page_size, "sort": sort}) + + result = await self.entity_client.list( + PromptEntity, + workspace=workspace, + filter_operation=filter_operation, + sort=sort, + page=page, + page_size=page_size, + ) + + prompts = [_entity_to_schema(entity) for entity in result.data] + + return Page( + data=prompts, + pagination=PaginationData( + page=result.pagination.page, + page_size=result.pagination.page_size, + current_page_size=len(prompts), + total_pages=result.pagination.total_pages, + total_results=result.pagination.total_results, + ), + sort=sort, + filter=None, + ) + + async def update_prompt(self, workspace: str, name: str, request: UpdatePromptRequest) -> Prompt | None: + """Replace a prompt's mutable fields (full update). Returns None if not found.""" + logger.debug("Updating prompt", extra={"workspace": workspace, "prompt_name": name}) + + try: + entity = await self.entity_client.get(PromptEntity, workspace=workspace, name=name) + except EntityNotFoundError: + logger.warning("Prompt not found for update", extra={"workspace": workspace, "prompt_name": name}) + return None + + entity.project = request.project + entity.description = request.description + entity.messages = request.messages + entity.input_variables = request.input_variables + entity.tools = request.tools + entity.tool_choice = request.tool_choice + entity.response_format = request.response_format + entity.inference_params = request.inference_params + if request.tags is not None: + entity.tags = request.tags + + updated = await self.entity_client.update(entity) + logger.info("Prompt updated", extra={"workspace": updated.workspace, "prompt_name": updated.name}) + return _entity_to_schema(updated) + + async def delete_prompt(self, request: DeletePromptRequest) -> bool: + """Delete a prompt by workspace and name. Returns False if not found.""" + logger.debug("Deleting prompt", extra={"workspace": request.workspace, "prompt_name": request.name}) + + try: + await self.entity_client.get(PromptEntity, workspace=request.workspace, name=request.name) + except EntityNotFoundError: + logger.warning( + "Prompt not found for deletion", + extra={"workspace": request.workspace, "prompt_name": request.name}, + ) + return False + + await self.entity_client.delete(PromptEntity, request.name, workspace=request.workspace) + logger.info("Prompt deleted", extra={"workspace": request.workspace, "prompt_name": request.name}) + return True diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py new file mode 100644 index 0000000000..84cbde3e73 --- /dev/null +++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py @@ -0,0 +1,186 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import logging + +from fastapi import APIRouter, Depends, HTTPException, Query, status +from nmp.common.api.common import Page +from nmp.common.api.parsed_filter import ParsedFilter, make_filter_dep +from nmp.common.api.utils import generate_openapi_extra_params +from nmp.common.entities.client import EntityValidationError +from nmp.core.models.api.dependencies import get_prompt_service +from nmp.core.models.api.service.prompt_service import PromptService +from nmp.core.models.schemas import ( + CreatePromptRequest, + DeletePromptRequest, + GetPromptRequest, + Prompt, + PromptFilter, + PromptSort, + UpdatePromptRequest, +) + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +@router.get( + "/v2/workspaces/{workspace}/prompts", + summary="List Prompts By Workspace", + response_description="Return prompts for a workspace", + status_code=status.HTTP_200_OK, + response_model=Page[Prompt], + response_model_exclude_none=True, + openapi_extra=generate_openapi_extra_params( + filter_schema=PromptFilter, + filter_description=("Filter prompts by workspace, project, name, description, created_at, and updated_at."), + ), +) +async def list_prompts( + workspace: str, + page: int = Query(default=1, description="Page number."), + page_size: int = Query(default=100, description="Page size."), + sort: PromptSort = Query( + default=PromptSort.CREATED_AT_ASC, + description="The field to sort by. To sort in decreasing order, use `-` in front of the field name.", + ), + parsed_filter: ParsedFilter = Depends(make_filter_dep(PromptFilter)), + service: PromptService = Depends(get_prompt_service), +) -> Page[Prompt]: + """List prompts for a specific workspace.""" + # Extract workspace — inject from path param if not in filter + filter_workspace = parsed_filter.remove("workspace") or workspace + try: + return await service.list_prompts( + workspace=filter_workspace, + page=page, + page_size=page_size, + sort=sort, + filter_operation=parsed_filter.operation, + ) + except Exception as e: + logger.exception(f"Failed to list prompts for workspace {workspace}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +@router.post( + "/v2/workspaces/{workspace}/prompts", + summary="Create Prompt", + response_description="Create a new prompt", + status_code=status.HTTP_201_CREATED, +) +async def create_prompt( + workspace: str, + request: CreatePromptRequest, + service: PromptService = Depends(get_prompt_service), +) -> Prompt: + """Create a new prompt.""" + logger.info(f"Creating prompt: {workspace}/{request.name}") + try: + return await service.create_prompt(request, workspace) + except EntityValidationError as e: + logger.warning(f"Entity store validation error during prompt creation: {e}") + raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)) + except ValueError as e: + if "already exists" in str(e).lower(): + logger.warning(f"Prompt already exists: {workspace}/{request.name}") + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"Prompt with workspace '{workspace}' and name '{request.name}' already exists", + ) + logger.warning(f"Prompt creation validation error: {e}") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + except Exception as e: + logger.exception("Failed to create prompt") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +@router.get( + "/v2/workspaces/{workspace}/prompts/{name}", + summary="Get Prompt", + response_description="Return prompt details", + status_code=status.HTTP_200_OK, +) +async def get_prompt( + workspace: str, + name: str, + service: PromptService = Depends(get_prompt_service), +) -> Prompt: + """Get a prompt by workspace and name.""" + logger.debug(f"Getting prompt: {workspace}/{name}") + try: + prompt = await service.get_prompt(GetPromptRequest(workspace=workspace, name=name)) + if not prompt: + logger.warning(f"Prompt not found: {workspace}/{name}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Prompt not found: {workspace}/{name}", + ) + return prompt + except HTTPException: + raise + except Exception as e: + logger.exception(f"Failed to get prompt {workspace}/{name}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +@router.put( + "/v2/workspaces/{workspace}/prompts/{name}", + summary="Update Prompt", + response_description="Update an existing prompt", + status_code=status.HTTP_200_OK, +) +async def update_prompt( + workspace: str, + name: str, + request: UpdatePromptRequest, + service: PromptService = Depends(get_prompt_service), +) -> Prompt: + """Update an existing prompt (full replacement of mutable fields).""" + logger.debug(f"Updating prompt: {workspace}/{name}") + try: + prompt = await service.update_prompt(workspace, name, request) + if not prompt: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Prompt not found: {workspace}/{name}", + ) + return prompt + except EntityValidationError as e: + logger.warning(f"Entity store validation error during prompt update: {e}") + raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)) + except HTTPException: + raise + except Exception as e: + logger.exception("Failed to update prompt") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +@router.delete( + "/v2/workspaces/{workspace}/prompts/{name}", + summary="Delete Prompt", + response_description="Delete a prompt", + status_code=status.HTTP_204_NO_CONTENT, +) +async def delete_prompt( + workspace: str, + name: str, + service: PromptService = Depends(get_prompt_service), +): + """Delete a prompt by workspace and name.""" + logger.info(f"Deleting prompt: {workspace}/{name}") + try: + deleted = await service.delete_prompt(DeletePromptRequest(workspace=workspace, name=name)) + if not deleted: + logger.warning(f"Prompt not found for deletion: {workspace}/{name}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Prompt not found: {workspace}/{name}", + ) + return None + except HTTPException: + raise + except Exception as e: + logger.exception(f"Failed to delete prompt {workspace}/{name}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) diff --git a/services/core/models/src/nmp/core/models/entities.py b/services/core/models/src/nmp/core/models/entities.py index 375395ca20..18bbdd2767 100644 --- a/services/core/models/src/nmp/core/models/entities.py +++ b/services/core/models/src/nmp/core/models/entities.py @@ -8,6 +8,7 @@ from nmp.common.auth import AuthContext from nmp.common.entities import constants from nmp.common.entities.client import EntityBase +from nmp.common.inference import InferenceParams from nmp.core.models.constants import ( MODEL_REF_MAX_LEN, MODEL_REF_PATTERN_DESCRIPTION, @@ -16,6 +17,7 @@ from nmp.core.models.schemas import ( APIEndpointData, BackendFormat, + ChatCompletionTool, FinetuningType, Lora, ModelDeploymentStatus, @@ -23,6 +25,7 @@ ModelSpec, NIMDeployment, PromptData, + PromptMessage, ServedModelMapping, ) from pydantic import Field, PrivateAttr, computed_field, field_validator, model_validator @@ -341,3 +344,53 @@ class ModelDeploymentConfig(EntityBase): description="Optional description of the deployment configuration.", max_length=1000, ) + + +class Prompt(EntityBase): + """A reusable, stored chat prompt, addressed by workspace/name. + + Captures the messages, declared template variables, optional tool definitions, + and default inference parameters needed to invoke a model through the + Inference Gateway. + """ + + __entity_type__: ClassVar[str] = "prompt" + + project: str | None = Field( + default=None, + description="The URN of the project associated with this prompt.", + max_length=constants.MAX_LENGTH_255, + ) + description: str | None = Field( + default=None, + description="Optional description of the prompt.", + max_length=1000, + ) + messages: list[PromptMessage] = Field( + default_factory=list, + description="Ordered list of chat messages that make up the prompt.", + ) + input_variables: list[str] = Field( + default_factory=list, + description="Names of the Jinja2 template variables the prompt expects.", + ) + tools: list[ChatCompletionTool] | None = Field( + default=None, + description="Optional OpenAI-compatible tool definitions to send with the prompt.", + ) + tool_choice: str | dict[str, Any] | None = Field( + default=None, + description="Controls which (if any) tool is called: 'none', 'auto', 'required', or a named-tool object.", + ) + response_format: dict[str, Any] | None = Field( + default=None, + description="Optional OpenAI-compatible response_format, e.g. a json_schema structured-output spec.", + ) + inference_params: InferenceParams | None = Field( + default=None, + description="Optional default model and sampling parameters (temperature, top_p, max_tokens, ...).", + ) + tags: list[str] = Field( + default_factory=list, + description="Optional free-form tags for organizing prompts.", + ) diff --git a/services/core/models/src/nmp/core/models/schemas.py b/services/core/models/src/nmp/core/models/schemas.py index 889f62d9a3..47e060ba49 100644 --- a/services/core/models/src/nmp/core/models/schemas.py +++ b/services/core/models/src/nmp/core/models/schemas.py @@ -4,7 +4,7 @@ from abc import ABC from datetime import datetime from enum import Enum, StrEnum -from typing import Annotated, Any, Dict, List, Optional, Union +from typing import Annotated, Any, Dict, List, Literal, Optional, Union from jinja2 import Environment from jinja2 import nodes as jinja_nodes @@ -699,6 +699,205 @@ class DeleteModelProviderRequest(BaseModel): ) +# ============================================================================ +# Prompt Schemas +# ============================================================================ + + +class PromptMessageRole(StrEnum): + """Role of a message author in a chat prompt. + + Follows the OpenAI chat schema the Inference Gateway speaks + (``/v1/chat/completions``). + """ + + SYSTEM = "system" + DEVELOPER = "developer" + USER = "user" + ASSISTANT = "assistant" + + +class PromptMessage(BaseModel): + """A single templated message in a chat prompt. + + ``content`` is a Jinja2 template body that may reference the prompt's + declared ``input_variables`` (e.g. ``{{ topic }}``). + """ + + role: PromptMessageRole = Field(description="The role of the message author.") + content: str = Field(description="Templated message content. May contain template variables.") + + +class FunctionDefinition(BaseModel): + """An OpenAI-compatible function definition for tool calling. + + Mirrors the ``function`` object the Inference Gateway forwards to + OpenAI-compatible backends. + """ + + name: str = Field( + description="The name of the function to be called.", + max_length=constants.MAX_LENGTH_255, + ) + description: Optional[str] = Field( + default=None, + description="A description of what the function does, used by the model to decide when and how to call it.", + ) + parameters: Optional[Dict[str, Any]] = Field( + default=None, + description="The parameters the function accepts, described as a JSON Schema object.", + ) + strict: Optional[bool] = Field( + default=None, + description="Whether to enforce strict schema adherence when generating the function call.", + ) + + +class ChatCompletionTool(BaseModel): + """An OpenAI-compatible tool definition (currently always a function tool).""" + + type: Literal["function"] = Field( + default="function", + description="The type of the tool. Currently only 'function' is supported.", + ) + function: FunctionDefinition = Field(description="The function definition for this tool.") + + +class Prompt(ModelEntityBaseModel): + """A reusable, stored chat prompt. + + A Prompt captures the messages, declared template variables, optional tool + definitions, and default inference parameters needed to invoke a model + through the Inference Gateway. The unique identifier is workspace/name. + """ + + id: str = Field( + default_factory=lambda: get_model_id("prompt"), + description="Unique identifier for the prompt.", + ) + description: Optional[str] = Field( + default=None, + description="Optional description of the prompt.", + max_length=1000, + ) + messages: List[PromptMessage] = Field( + default_factory=list, + description="Ordered list of chat messages that make up the prompt.", + ) + input_variables: List[str] = Field( + default_factory=list, + description="Names of the Jinja2 template variables the prompt expects.", + ) + tools: Optional[List[ChatCompletionTool]] = Field( + default=None, + description="Optional OpenAI-compatible tool definitions to send with the prompt.", + ) + tool_choice: Optional[Union[str, Dict[str, Any]]] = Field( + default=None, + description="Controls which (if any) tool is called: 'none', 'auto', 'required', or a named-tool object.", + ) + response_format: Optional[Dict[str, Any]] = Field( + default=None, + description="Optional OpenAI-compatible response_format, e.g. a json_schema structured-output spec.", + ) + inference_params: Optional[InferenceParams] = Field( + default=None, + description="Optional default model and sampling parameters (temperature, top_p, max_tokens, ...).", + ) + tags: List[str] = Field( + default_factory=list, + description="Optional free-form tags for organizing prompts.", + ) + + +class PromptSort(StrEnum): + """Sort fields for Prompt queries.""" + + NAME_ASC = "name" + NAME_DESC = "-name" + CREATED_AT_ASC = "created_at" + CREATED_AT_DESC = "-created_at" + UPDATED_AT_ASC = "updated_at" + UPDATED_AT_DESC = "-updated_at" + + +class CreatePromptRequest(BaseModel): + """Request model for creating a Prompt.""" + + name: str = Field( + description=f"Name of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}", + max_length=constants.MAX_LENGTH_255, + pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH, + examples=["support-bot-system", "summarizer"], + ) + project: Optional[str] = Field( + default=None, + description="The URN of the project associated with this prompt.", + max_length=constants.MAX_LENGTH_255, + pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH_SLASH, + ) + description: Optional[str] = Field(default=None, max_length=1000) + messages: List[PromptMessage] = Field(default_factory=list) + input_variables: List[str] = Field(default_factory=list) + tools: Optional[List[ChatCompletionTool]] = Field(default=None) + tool_choice: Optional[Union[str, Dict[str, Any]]] = Field(default=None) + response_format: Optional[Dict[str, Any]] = Field(default=None) + inference_params: Optional[InferenceParams] = Field(default=None) + tags: Optional[List[str]] = Field(default=None) + + +class UpdatePromptRequest(BaseModel): + """Request model for replacing a Prompt's mutable fields (full update). + + The prompt name and workspace come from the URL path and cannot be changed. + """ + + project: Optional[str] = Field( + default=None, + description="The URN of the project associated with this prompt.", + max_length=constants.MAX_LENGTH_255, + pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH_SLASH, + ) + description: Optional[str] = Field(default=None, max_length=1000) + messages: List[PromptMessage] = Field(default_factory=list) + input_variables: List[str] = Field(default_factory=list) + tools: Optional[List[ChatCompletionTool]] = Field(default=None) + tool_choice: Optional[Union[str, Dict[str, Any]]] = Field(default=None) + response_format: Optional[Dict[str, Any]] = Field(default=None) + inference_params: Optional[InferenceParams] = Field(default=None) + tags: Optional[List[str]] = Field(default=None) + + +class GetPromptRequest(BaseModel): + """Request model for getting a Prompt.""" + + workspace: str = Field( + description=f"The workspace of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}", + max_length=constants.MAX_LENGTH_255, + pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH, + ) + name: str = Field( + description=f"Name of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}", + max_length=constants.MAX_LENGTH_255, + pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH, + ) + + +class DeletePromptRequest(BaseModel): + """Request model for deleting a Prompt.""" + + workspace: str = Field( + description=f"The workspace of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}", + max_length=constants.MAX_LENGTH_255, + pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH, + ) + name: str = Field( + description=f"Name of the prompt. {constants.REGEX_WORD_CHARACTER_DOT_DASH_DESCRIPTION}", + max_length=constants.MAX_LENGTH_255, + pattern=constants.REGEX_WORD_CHARACTER_DOT_DASH, + ) + + # ============================================================================ # Model Entity Schemas # ============================================================================ @@ -1442,6 +1641,17 @@ class ListModelDeploymentsRequest(BaseModel): # ============================================================================ +class PromptFilter(Filter): + """Filter for Prompt queries.""" + + workspace: Optional[str] = Field(None, description="Filter by workspace.") + project: Optional[str] = Field(None, description="Filter by project URN.") + name: Optional[str] = Field(None, description="Filter by name.") + description: Optional[str] = Field(None, description="Filter by description.") + created_at: Optional[DatetimeFilter] = Field(None, description="Filter by creation date.") + updated_at: Optional[DatetimeFilter] = Field(None, description="Filter by update date.") + + class ModelProviderFilter(Filter): """Filter for ModelProvider queries.""" diff --git a/services/core/models/src/nmp/core/models/service.py b/services/core/models/src/nmp/core/models/service.py index 2ff89fa6ea..0c39e81e02 100644 --- a/services/core/models/src/nmp/core/models/service.py +++ b/services/core/models/src/nmp/core/models/service.py @@ -35,7 +35,7 @@ def description(self) -> str: def get_routers(self) -> List[RouterConfig]: """Return routers for the models service.""" - from nmp.core.models.api.v2 import adapters, deployment_configs, deployments, models, providers + from nmp.core.models.api.v2 import adapters, deployment_configs, deployments, models, prompts, providers return [ RouterConfig( @@ -63,6 +63,11 @@ def get_routers(self) -> List[RouterConfig]: tag="ModelProviders", description="Operations related to model providers.", ), + RouterConfig( + prompts.router, + tag="Prompts", + description="CRUD operations for reusable chat prompt entities.", + ), ] def configure_app(self) -> None: diff --git a/services/core/models/tests/unit/api/test_prompts_api.py b/services/core/models/tests/unit/api/test_prompts_api.py new file mode 100644 index 0000000000..fd30eabba8 --- /dev/null +++ b/services/core/models/tests/unit/api/test_prompts_api.py @@ -0,0 +1,244 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for Prompt API endpoints.""" + +from datetime import datetime +from unittest.mock import AsyncMock, Mock + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient +from nmp.common.api.common import Page, PaginationData +from nmp.common.entities.client import EntityValidationError +from nmp.core.models.api.service.prompt_service import PromptService +from nmp.core.models.api.v2.prompts import router +from nmp.core.models.schemas import Prompt, PromptMessage, PromptMessageRole + + +@pytest.fixture +def mock_prompt_service(): + """Create a mock PromptService.""" + service = Mock(spec=PromptService) + service.list_prompts = AsyncMock() + service.get_prompt = AsyncMock() + service.create_prompt = AsyncMock() + service.update_prompt = AsyncMock() + service.delete_prompt = AsyncMock() + return service + + +@pytest.fixture +def test_app(mock_prompt_service): + """Create a FastAPI test app with the prompt service dependency overridden.""" + from nmp.core.models.api.dependencies import get_prompt_service + + app = FastAPI() + app.dependency_overrides[get_prompt_service] = lambda: mock_prompt_service + app.include_router(router, prefix="/apis/models") + return app + + +@pytest.fixture +def client(test_app): + return TestClient(test_app) + + +@pytest.fixture +def sample_prompt(): + return Prompt( + id="prompt-1", + name="summarizer", + workspace="default", + description="A summarization prompt", + messages=[PromptMessage(role=PromptMessageRole.USER, content="Summarize: {{ document }}")], + input_variables=["document"], + created_at=datetime.now(), + updated_at=datetime.now(), + ) + + +@pytest.fixture +def sample_page(sample_prompt): + return Page( + data=[sample_prompt], + pagination=PaginationData( + page=1, + page_size=100, + current_page_size=1, + total_results=1, + total_pages=1, + ), + sort="created_at", + filter=None, + ) + + +def test_list_prompts_default_parameters(client, mock_prompt_service, sample_page): + mock_prompt_service.list_prompts.return_value = sample_page + + response = client.get("/apis/models/v2/workspaces/default/prompts") + + assert response.status_code == 200 + call_args = mock_prompt_service.list_prompts.call_args + assert call_args.kwargs["page"] == 1 + assert call_args.kwargs["page_size"] == 100 + assert call_args.kwargs["sort"] == "created_at" + assert call_args.kwargs["workspace"] == "default" + + +def test_list_prompts_with_sort(client, mock_prompt_service, sample_page): + mock_prompt_service.list_prompts.return_value = sample_page + + response = client.get("/apis/models/v2/workspaces/default/prompts?sort=-name") + + assert response.status_code == 200 + assert mock_prompt_service.list_prompts.call_args.kwargs["sort"] == "-name" + + +def test_list_prompts_with_name_filter(client, mock_prompt_service, sample_page): + mock_prompt_service.list_prompts.return_value = sample_page + + response = client.get("/apis/models/v2/workspaces/default/prompts?filter[name][]=summarizer") + + assert response.status_code == 200 + assert mock_prompt_service.list_prompts.call_args.kwargs.get("filter_operation") is not None + + +def test_list_prompts_response_structure(client, mock_prompt_service, sample_page): + mock_prompt_service.list_prompts.return_value = sample_page + + response = client.get("/apis/models/v2/workspaces/default/prompts") + + assert response.status_code == 200 + data = response.json() + assert "data" in data + assert "pagination" in data + assert len(data["data"]) == 1 + assert data["data"][0]["name"] == "summarizer" + + +def test_create_prompt_success(client, mock_prompt_service, sample_prompt): + mock_prompt_service.create_prompt.return_value = sample_prompt + + request_body = { + "name": "summarizer", + "messages": [{"role": "user", "content": "Summarize: {{ document }}"}], + "input_variables": ["document"], + } + + response = client.post("/apis/models/v2/workspaces/default/prompts", json=request_body) + + assert response.status_code == 201 + data = response.json() + assert data["name"] == "summarizer" + assert data["messages"][0]["role"] == "user" + + +def test_create_prompt_with_tools(client, mock_prompt_service, sample_prompt): + mock_prompt_service.create_prompt.return_value = sample_prompt + + request_body = { + "name": "weather-bot", + "messages": [{"role": "system", "content": "You can call tools."}], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather", + "parameters": {"type": "object", "properties": {"city": {"type": "string"}}}, + }, + } + ], + "tool_choice": "auto", + } + + response = client.post("/apis/models/v2/workspaces/default/prompts", json=request_body) + + assert response.status_code == 201 + # The request validated and reached the service with parsed tools. + sent_request = mock_prompt_service.create_prompt.call_args[0][0] + assert sent_request.tools[0].function.name == "get_weather" + assert sent_request.tool_choice == "auto" + + +def test_create_prompt_conflict_returns_409(client, mock_prompt_service): + mock_prompt_service.create_prompt.side_effect = ValueError( + "Prompt with name 'summarizer' already exists in workspace 'default'" + ) + + response = client.post( + "/apis/models/v2/workspaces/default/prompts", + json={"name": "summarizer"}, + ) + + assert response.status_code == 409 + + +def test_create_prompt_entity_validation_error_returns_422(client, mock_prompt_service): + mock_prompt_service.create_prompt.side_effect = EntityValidationError("name must match pattern") + + response = client.post( + "/apis/models/v2/workspaces/default/prompts", + json={"name": "summarizer"}, + ) + + assert response.status_code == 422 + assert "name must match pattern" in response.json()["detail"] + + +def test_get_prompt_success(client, mock_prompt_service, sample_prompt): + mock_prompt_service.get_prompt.return_value = sample_prompt + + response = client.get("/apis/models/v2/workspaces/default/prompts/summarizer") + + assert response.status_code == 200 + assert response.json()["name"] == "summarizer" + + +def test_get_prompt_not_found_returns_404(client, mock_prompt_service): + mock_prompt_service.get_prompt.return_value = None + + response = client.get("/apis/models/v2/workspaces/default/prompts/missing") + + assert response.status_code == 404 + + +def test_update_prompt_success(client, mock_prompt_service, sample_prompt): + mock_prompt_service.update_prompt.return_value = sample_prompt + + response = client.put( + "/apis/models/v2/workspaces/default/prompts/summarizer", + json={"description": "updated", "messages": [{"role": "user", "content": "hi"}]}, + ) + + assert response.status_code == 200 + assert response.json()["name"] == "summarizer" + + +def test_update_prompt_not_found_returns_404(client, mock_prompt_service): + mock_prompt_service.update_prompt.return_value = None + + response = client.put( + "/apis/models/v2/workspaces/default/prompts/missing", + json={"description": "updated"}, + ) + + assert response.status_code == 404 + + +def test_delete_prompt_success(client, mock_prompt_service): + mock_prompt_service.delete_prompt.return_value = True + + response = client.delete("/apis/models/v2/workspaces/default/prompts/summarizer") + + assert response.status_code == 204 + + +def test_delete_prompt_not_found_returns_404(client, mock_prompt_service): + mock_prompt_service.delete_prompt.return_value = False + + response = client.delete("/apis/models/v2/workspaces/default/prompts/missing") + + assert response.status_code == 404 diff --git a/services/core/models/tests/unit/test_prompt_service_unit.py b/services/core/models/tests/unit/test_prompt_service_unit.py new file mode 100644 index 0000000000..68531fe3b8 --- /dev/null +++ b/services/core/models/tests/unit/test_prompt_service_unit.py @@ -0,0 +1,231 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Unit tests for Prompt service with mocked EntityClient.""" + +from datetime import datetime, timezone +from typing import Any +from unittest.mock import AsyncMock + +import pytest +from nmp.common.entities.client import EntityClient, EntityNotFoundError +from nmp.core.models.api.service.prompt_service import PromptService +from nmp.core.models.entities import Prompt as PromptEntity +from nmp.core.models.schemas import ( + ChatCompletionTool, + CreatePromptRequest, + DeletePromptRequest, + FunctionDefinition, + GetPromptRequest, + Prompt, + PromptMessage, + PromptMessageRole, + UpdatePromptRequest, +) + + +def create_prompt_entity( + entity_id: str = "prompt-id-123", + created_at: datetime | None = None, + updated_at: datetime | None = None, + **kwargs: Any, +) -> PromptEntity: + """Helper to create a PromptEntity with the store-managed private attributes set.""" + entity = PromptEntity(**kwargs) + entity._id = entity_id + entity._created_at = created_at or datetime.now(timezone.utc) + entity._updated_at = updated_at or datetime.now(timezone.utc) + return entity + + +@pytest.fixture +def mock_entity_client() -> AsyncMock: + """Create a mock EntityClient for testing.""" + return AsyncMock(spec=EntityClient) + + +@pytest.fixture +def prompt_service(mock_entity_client): + """Create a PromptService with mocked EntityClient.""" + return PromptService(mock_entity_client) + + +@pytest.fixture +def sample_messages() -> list[PromptMessage]: + return [ + PromptMessage(role=PromptMessageRole.SYSTEM, content="You are a helpful {{ persona }}."), + PromptMessage(role=PromptMessageRole.USER, content="Summarize: {{ document }}"), + ] + + +@pytest.fixture +def sample_tools() -> list[ChatCompletionTool]: + return [ + ChatCompletionTool( + function=FunctionDefinition( + name="get_weather", + description="Get the current weather for a city.", + parameters={"type": "object", "properties": {"city": {"type": "string"}}}, + ) + ) + ] + + +@pytest.fixture +def sample_create_request(sample_messages, sample_tools) -> CreatePromptRequest: + return CreatePromptRequest( + name="summarizer", + project="test-project", + description="A summarization prompt", + messages=sample_messages, + input_variables=["persona", "document"], + tools=sample_tools, + tool_choice="auto", + tags=["nlp", "summarize"], + ) + + +@pytest.fixture +def sample_prompt_entity(sample_messages, sample_tools) -> PromptEntity: + return create_prompt_entity( + name="summarizer", + workspace="default", + project="test-project", + description="A summarization prompt", + messages=sample_messages, + input_variables=["persona", "document"], + tools=sample_tools, + tool_choice="auto", + tags=["nlp", "summarize"], + ) + + +@pytest.mark.asyncio +async def test_create_prompt_success(prompt_service, mock_entity_client, sample_create_request, sample_prompt_entity): + """Test successful prompt creation.""" + mock_entity_client.get.side_effect = EntityNotFoundError("Entity not found") + mock_entity_client.create.return_value = sample_prompt_entity + + result = await prompt_service.create_prompt(sample_create_request, "default") + + assert isinstance(result, Prompt) + assert result.name == "summarizer" + assert result.workspace == "default" + assert result.input_variables == ["persona", "document"] + assert result.tools is not None + assert result.tools[0].function.name == "get_weather" + mock_entity_client.create.assert_called_once() + created_entity = mock_entity_client.create.call_args[0][0] + assert isinstance(created_entity, PromptEntity) + assert created_entity.name == "summarizer" + assert len(created_entity.messages) == 2 + + +@pytest.mark.asyncio +async def test_create_prompt_conflict_raises_value_error( + prompt_service, mock_entity_client, sample_create_request, sample_prompt_entity +): + """Test that an existing prompt causes a ValueError and no create call.""" + mock_entity_client.get.return_value = sample_prompt_entity # already exists + + with pytest.raises(ValueError, match="already exists"): + await prompt_service.create_prompt(sample_create_request, "default") + + mock_entity_client.create.assert_not_called() + + +@pytest.mark.asyncio +async def test_get_prompt_found(prompt_service, mock_entity_client, sample_prompt_entity): + """Test retrieving an existing prompt.""" + mock_entity_client.get.return_value = sample_prompt_entity + + result = await prompt_service.get_prompt(GetPromptRequest(workspace="default", name="summarizer")) + + assert result is not None + assert result.name == "summarizer" + assert result.tool_choice == "auto" + + +@pytest.mark.asyncio +async def test_get_prompt_not_found(prompt_service, mock_entity_client): + """Test that a missing prompt returns None.""" + mock_entity_client.get.side_effect = EntityNotFoundError("not found") + + result = await prompt_service.get_prompt(GetPromptRequest(workspace="default", name="missing")) + + assert result is None + + +@pytest.mark.asyncio +async def test_list_prompts(prompt_service, mock_entity_client, sample_prompt_entity): + """Test listing prompts returns a Page with mapped schemas.""" + mock_result = AsyncMock() + mock_result.data = [sample_prompt_entity] + mock_result.pagination = AsyncMock(page=1, page_size=100, total_pages=1, total_results=1) + mock_entity_client.list.return_value = mock_result + + page = await prompt_service.list_prompts(workspace="default", page=1, page_size=100, sort="created_at") + + assert page.pagination.total_results == 1 + assert page.pagination.current_page_size == 1 + assert len(page.data) == 1 + assert page.data[0].name == "summarizer" + + +@pytest.mark.asyncio +async def test_update_prompt_success(prompt_service, mock_entity_client, sample_prompt_entity): + """Test updating an existing prompt replaces mutable fields.""" + mock_entity_client.get.return_value = sample_prompt_entity + mock_entity_client.update.return_value = sample_prompt_entity + + request = UpdatePromptRequest( + description="Updated description", + messages=[PromptMessage(role=PromptMessageRole.USER, content="New {{ x }}")], + input_variables=["x"], + tags=["updated"], + ) + + result = await prompt_service.update_prompt("default", "summarizer", request) + + assert result is not None + mock_entity_client.update.assert_called_once() + updated_entity = mock_entity_client.update.call_args[0][0] + assert updated_entity.description == "Updated description" + assert updated_entity.input_variables == ["x"] + assert updated_entity.tags == ["updated"] + # Full replacement clears fields not present in the request + assert updated_entity.tools is None + assert updated_entity.tool_choice is None + + +@pytest.mark.asyncio +async def test_update_prompt_not_found(prompt_service, mock_entity_client): + """Test that updating a missing prompt returns None.""" + mock_entity_client.get.side_effect = EntityNotFoundError("not found") + + result = await prompt_service.update_prompt("default", "missing", UpdatePromptRequest()) + + assert result is None + mock_entity_client.update.assert_not_called() + + +@pytest.mark.asyncio +async def test_delete_prompt_success(prompt_service, mock_entity_client, sample_prompt_entity): + """Test deleting an existing prompt returns True.""" + mock_entity_client.get.return_value = sample_prompt_entity + + result = await prompt_service.delete_prompt(DeletePromptRequest(workspace="default", name="summarizer")) + + assert result is True + mock_entity_client.delete.assert_called_once() + + +@pytest.mark.asyncio +async def test_delete_prompt_not_found(prompt_service, mock_entity_client): + """Test that deleting a missing prompt returns False and does not call delete.""" + mock_entity_client.get.side_effect = EntityNotFoundError("not found") + + result = await prompt_service.delete_prompt(DeletePromptRequest(workspace="default", name="missing")) + + assert result is False + mock_entity_client.delete.assert_not_called() From 92c7b1048ecc83fa97bad8f0df1710d4a8302aa1 Mon Sep 17 00:00:00 2001 From: Sean Teramae Date: Tue, 9 Jun 2026 15:29:13 -0700 Subject: [PATCH 02/10] Potential fix for pull request finding 'CodeQL / Log Injection' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Signed-off-by: Sean Teramae --- services/core/models/src/nmp/core/models/api/v2/prompts.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py index 84cbde3e73..0419c0b525 100644 --- a/services/core/models/src/nmp/core/models/api/v2/prompts.py +++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py @@ -60,7 +60,8 @@ async def list_prompts( filter_operation=parsed_filter.operation, ) except Exception as e: - logger.exception(f"Failed to list prompts for workspace {workspace}") + safe_workspace = str(workspace).replace("\r", "").replace("\n", "") + logger.exception(f"Failed to list prompts for workspace {safe_workspace}") raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) From 0efe1d6ef7264f3734c3e9d30fb3cab6b05b2c06 Mon Sep 17 00:00:00 2001 From: Sean Teramae Date: Tue, 9 Jun 2026 15:29:22 -0700 Subject: [PATCH 03/10] Potential fix for pull request finding 'CodeQL / Log Injection' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Signed-off-by: Sean Teramae --- .../models/src/nmp/core/models/api/v2/prompts.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py index 0419c0b525..70bd2433a9 100644 --- a/services/core/models/src/nmp/core/models/api/v2/prompts.py +++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py @@ -22,6 +22,12 @@ logger = logging.getLogger(__name__) + +def _sanitize_for_log(value: object) -> str: + """Prevent log injection by removing line-break/control characters.""" + return str(value).replace("\r", "").replace("\n", "") + + router = APIRouter() @@ -77,7 +83,9 @@ async def create_prompt( service: PromptService = Depends(get_prompt_service), ) -> Prompt: """Create a new prompt.""" - logger.info(f"Creating prompt: {workspace}/{request.name}") + safe_workspace = _sanitize_for_log(workspace) + safe_request_name = _sanitize_for_log(request.name) + logger.info(f"Creating prompt: {safe_workspace}/{safe_request_name}") try: return await service.create_prompt(request, workspace) except EntityValidationError as e: @@ -85,7 +93,7 @@ async def create_prompt( raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)) except ValueError as e: if "already exists" in str(e).lower(): - logger.warning(f"Prompt already exists: {workspace}/{request.name}") + logger.warning(f"Prompt already exists: {safe_workspace}/{safe_request_name}") raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail=f"Prompt with workspace '{workspace}' and name '{request.name}' already exists", From bf6ffb610b8dc9349162d081fe1feee89c2872b0 Mon Sep 17 00:00:00 2001 From: Sean Teramae Date: Tue, 9 Jun 2026 15:37:21 -0700 Subject: [PATCH 04/10] fix(models): complete log-injection sanitization for prompt API The CodeQL autofix only patched list_prompts and create_prompt, leaving get_prompt, update_prompt, and delete_prompt still interpolating raw user-controlled workspace/name into log messages (alerts 4168-4174). Sanitize the remaining log calls via _sanitize_for_log, and switch list_prompts to use the shared helper instead of an inlined replace for consistency. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Sean Teramae --- .../src/nmp/core/models/api/v2/prompts.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py index 70bd2433a9..9f01a2d1d2 100644 --- a/services/core/models/src/nmp/core/models/api/v2/prompts.py +++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py @@ -66,8 +66,7 @@ async def list_prompts( filter_operation=parsed_filter.operation, ) except Exception as e: - safe_workspace = str(workspace).replace("\r", "").replace("\n", "") - logger.exception(f"Failed to list prompts for workspace {safe_workspace}") + logger.exception(f"Failed to list prompts for workspace {_sanitize_for_log(workspace)}") raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @@ -117,11 +116,11 @@ async def get_prompt( service: PromptService = Depends(get_prompt_service), ) -> Prompt: """Get a prompt by workspace and name.""" - logger.debug(f"Getting prompt: {workspace}/{name}") + logger.debug(f"Getting prompt: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}") try: prompt = await service.get_prompt(GetPromptRequest(workspace=workspace, name=name)) if not prompt: - logger.warning(f"Prompt not found: {workspace}/{name}") + logger.warning(f"Prompt not found: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}") raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"Prompt not found: {workspace}/{name}", @@ -130,7 +129,7 @@ async def get_prompt( except HTTPException: raise except Exception as e: - logger.exception(f"Failed to get prompt {workspace}/{name}") + logger.exception(f"Failed to get prompt {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}") raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @@ -147,7 +146,7 @@ async def update_prompt( service: PromptService = Depends(get_prompt_service), ) -> Prompt: """Update an existing prompt (full replacement of mutable fields).""" - logger.debug(f"Updating prompt: {workspace}/{name}") + logger.debug(f"Updating prompt: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}") try: prompt = await service.update_prompt(workspace, name, request) if not prompt: @@ -178,11 +177,11 @@ async def delete_prompt( service: PromptService = Depends(get_prompt_service), ): """Delete a prompt by workspace and name.""" - logger.info(f"Deleting prompt: {workspace}/{name}") + logger.info(f"Deleting prompt: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}") try: deleted = await service.delete_prompt(DeletePromptRequest(workspace=workspace, name=name)) if not deleted: - logger.warning(f"Prompt not found for deletion: {workspace}/{name}") + logger.warning(f"Prompt not found for deletion: {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}") raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"Prompt not found: {workspace}/{name}", @@ -191,5 +190,5 @@ async def delete_prompt( except HTTPException: raise except Exception as e: - logger.exception(f"Failed to delete prompt {workspace}/{name}") + logger.exception(f"Failed to delete prompt {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}") raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) From 85c27281f9d580dddc492b3e5a16784b9c37380f Mon Sep 17 00:00:00 2001 From: Sean Teramae Date: Tue, 9 Jun 2026 16:02:30 -0700 Subject: [PATCH 05/10] fix(models): address PR review findings in prompt API - Prevent workspace filter from overriding the path-scoped workspace in list_prompts (cross-workspace read vector) - Replace detail=str(e) with generic message in all 500 handlers to avoid leaking backend internals to clients - Fix PUT full-replacement: tags was conditionally skipped; now always replaced (entity.tags = request.tags or []) - Add ge=1/le=1000 bounds on page/page_size Query params - Make ChatCompletionTool.type required (no default) so the generated OpenAPI schema marks it as required per OpenAI spec - Fix validation error handler to return "Invalid prompt data" instead of the raw exception string - Add tests: workspace scope isolation, page/page_size bounds, tags cleared on omission Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Sean Teramae --- .../core/models/api/service/prompt_service.py | 3 +- .../src/nmp/core/models/api/v2/prompts.py | 32 +++++++++---------- .../models/src/nmp/core/models/schemas.py | 1 - .../models/tests/unit/api/test_prompts_api.py | 19 +++++++++++ .../tests/unit/test_prompt_service_unit.py | 18 ++++++++++- 5 files changed, 53 insertions(+), 20 deletions(-) diff --git a/services/core/models/src/nmp/core/models/api/service/prompt_service.py b/services/core/models/src/nmp/core/models/api/service/prompt_service.py index 4754f91efb..9d068be2dd 100644 --- a/services/core/models/src/nmp/core/models/api/service/prompt_service.py +++ b/services/core/models/src/nmp/core/models/api/service/prompt_service.py @@ -150,8 +150,7 @@ async def update_prompt(self, workspace: str, name: str, request: UpdatePromptRe entity.tool_choice = request.tool_choice entity.response_format = request.response_format entity.inference_params = request.inference_params - if request.tags is not None: - entity.tags = request.tags + entity.tags = request.tags or [] updated = await self.entity_client.update(entity) logger.info("Prompt updated", extra={"workspace": updated.workspace, "prompt_name": updated.name}) diff --git a/services/core/models/src/nmp/core/models/api/v2/prompts.py b/services/core/models/src/nmp/core/models/api/v2/prompts.py index 9f01a2d1d2..86dd87cdbc 100644 --- a/services/core/models/src/nmp/core/models/api/v2/prompts.py +++ b/services/core/models/src/nmp/core/models/api/v2/prompts.py @@ -45,8 +45,8 @@ def _sanitize_for_log(value: object) -> str: ) async def list_prompts( workspace: str, - page: int = Query(default=1, description="Page number."), - page_size: int = Query(default=100, description="Page size."), + page: int = Query(default=1, ge=1, description="Page number."), + page_size: int = Query(default=100, ge=1, le=1000, description="Page size."), sort: PromptSort = Query( default=PromptSort.CREATED_AT_ASC, description="The field to sort by. To sort in decreasing order, use `-` in front of the field name.", @@ -55,19 +55,19 @@ async def list_prompts( service: PromptService = Depends(get_prompt_service), ) -> Page[Prompt]: """List prompts for a specific workspace.""" - # Extract workspace — inject from path param if not in filter - filter_workspace = parsed_filter.remove("workspace") or workspace + # Discard any workspace override in the filter — always scope to the path workspace. + parsed_filter.remove("workspace") try: return await service.list_prompts( - workspace=filter_workspace, + workspace=workspace, page=page, page_size=page_size, sort=sort, filter_operation=parsed_filter.operation, ) - except Exception as e: + except Exception: logger.exception(f"Failed to list prompts for workspace {_sanitize_for_log(workspace)}") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error") @router.post( @@ -98,10 +98,10 @@ async def create_prompt( detail=f"Prompt with workspace '{workspace}' and name '{request.name}' already exists", ) logger.warning(f"Prompt creation validation error: {e}") - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) - except Exception as e: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid prompt data") + except Exception: logger.exception("Failed to create prompt") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error") @router.get( @@ -128,9 +128,9 @@ async def get_prompt( return prompt except HTTPException: raise - except Exception as e: + except Exception: logger.exception(f"Failed to get prompt {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error") @router.put( @@ -160,9 +160,9 @@ async def update_prompt( raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)) except HTTPException: raise - except Exception as e: + except Exception: logger.exception("Failed to update prompt") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error") @router.delete( @@ -189,6 +189,6 @@ async def delete_prompt( return None except HTTPException: raise - except Exception as e: + except Exception: logger.exception(f"Failed to delete prompt {_sanitize_for_log(workspace)}/{_sanitize_for_log(name)}") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal server error") diff --git a/services/core/models/src/nmp/core/models/schemas.py b/services/core/models/src/nmp/core/models/schemas.py index 47e060ba49..546de3570b 100644 --- a/services/core/models/src/nmp/core/models/schemas.py +++ b/services/core/models/src/nmp/core/models/schemas.py @@ -757,7 +757,6 @@ class ChatCompletionTool(BaseModel): """An OpenAI-compatible tool definition (currently always a function tool).""" type: Literal["function"] = Field( - default="function", description="The type of the tool. Currently only 'function' is supported.", ) function: FunctionDefinition = Field(description="The function definition for this tool.") diff --git a/services/core/models/tests/unit/api/test_prompts_api.py b/services/core/models/tests/unit/api/test_prompts_api.py index fd30eabba8..da8f30cd8a 100644 --- a/services/core/models/tests/unit/api/test_prompts_api.py +++ b/services/core/models/tests/unit/api/test_prompts_api.py @@ -105,6 +105,25 @@ def test_list_prompts_with_name_filter(client, mock_prompt_service, sample_page) assert mock_prompt_service.list_prompts.call_args.kwargs.get("filter_operation") is not None +def test_list_prompts_workspace_filter_cannot_override_path(client, mock_prompt_service, sample_page): + mock_prompt_service.list_prompts.return_value = sample_page + + response = client.get("/apis/models/v2/workspaces/default/prompts?filter[workspace][]=other") + + assert response.status_code == 200 + assert mock_prompt_service.list_prompts.call_args.kwargs["workspace"] == "default" + + +def test_list_prompts_invalid_page_returns_422(client): + response = client.get("/apis/models/v2/workspaces/default/prompts?page=0") + assert response.status_code == 422 + + +def test_list_prompts_invalid_page_size_returns_422(client): + response = client.get("/apis/models/v2/workspaces/default/prompts?page_size=0") + assert response.status_code == 422 + + def test_list_prompts_response_structure(client, mock_prompt_service, sample_page): mock_prompt_service.list_prompts.return_value = sample_page diff --git a/services/core/models/tests/unit/test_prompt_service_unit.py b/services/core/models/tests/unit/test_prompt_service_unit.py index 68531fe3b8..37b6415d31 100644 --- a/services/core/models/tests/unit/test_prompt_service_unit.py +++ b/services/core/models/tests/unit/test_prompt_service_unit.py @@ -62,11 +62,12 @@ def sample_messages() -> list[PromptMessage]: def sample_tools() -> list[ChatCompletionTool]: return [ ChatCompletionTool( + type="function", function=FunctionDefinition( name="get_weather", description="Get the current weather for a city.", parameters={"type": "object", "properties": {"city": {"type": "string"}}}, - ) + ), ) ] @@ -198,6 +199,21 @@ async def test_update_prompt_success(prompt_service, mock_entity_client, sample_ assert updated_entity.tool_choice is None +@pytest.mark.asyncio +async def test_update_prompt_clears_tags_when_omitted(prompt_service, mock_entity_client, sample_prompt_entity): + """Test that omitting tags in an update replaces them with an empty list (full replacement).""" + sample_prompt_entity.tags = ["old-tag"] + mock_entity_client.get.return_value = sample_prompt_entity + mock_entity_client.update.return_value = sample_prompt_entity + + request = UpdatePromptRequest(description="no tags") + + await prompt_service.update_prompt("default", "summarizer", request) + + updated_entity = mock_entity_client.update.call_args[0][0] + assert updated_entity.tags == [] + + @pytest.mark.asyncio async def test_update_prompt_not_found(prompt_service, mock_entity_client): """Test that updating a missing prompt returns None.""" From e79c921a7fd5894c45770d1771e7024da96c59e3 Mon Sep 17 00:00:00 2001 From: Sean Teramae Date: Tue, 9 Jun 2026 16:19:40 -0700 Subject: [PATCH 06/10] run lint fix Signed-off-by: Sean Teramae --- openapi/ga/individual/platform.openapi.yaml | 5 ++- openapi/ga/openapi.yaml | 5 ++- openapi/openapi.yaml | 5 ++- sdk/stainless.yaml | 1 + .../nmp/core/auth/assets/static-authz.yaml | 36 +++++++++++++++++++ 5 files changed, 49 insertions(+), 3 deletions(-) diff --git a/openapi/ga/individual/platform.openapi.yaml b/openapi/ga/individual/platform.openapi.yaml index 96e7ade804..8475d741a0 100644 --- a/openapi/ga/individual/platform.openapi.yaml +++ b/openapi/ga/individual/platform.openapi.yaml @@ -6598,6 +6598,7 @@ paths: required: false schema: type: integer + minimum: 1 description: Page number. default: 1 title: Page @@ -6607,6 +6608,8 @@ paths: required: false schema: type: integer + maximum: 1000 + minimum: 1 description: Page size. default: 100 title: Page Size @@ -8407,13 +8410,13 @@ components: const: function title: Type description: The type of the tool. Currently only 'function' is supported. - default: function function: allOf: - $ref: '#/components/schemas/FunctionDefinition' description: The function definition for this tool. type: object required: + - type - function title: ChatCompletionTool description: An OpenAI-compatible tool definition (currently always a function diff --git a/openapi/ga/openapi.yaml b/openapi/ga/openapi.yaml index 96e7ade804..8475d741a0 100644 --- a/openapi/ga/openapi.yaml +++ b/openapi/ga/openapi.yaml @@ -6598,6 +6598,7 @@ paths: required: false schema: type: integer + minimum: 1 description: Page number. default: 1 title: Page @@ -6607,6 +6608,8 @@ paths: required: false schema: type: integer + maximum: 1000 + minimum: 1 description: Page size. default: 100 title: Page Size @@ -8407,13 +8410,13 @@ components: const: function title: Type description: The type of the tool. Currently only 'function' is supported. - default: function function: allOf: - $ref: '#/components/schemas/FunctionDefinition' description: The function definition for this tool. type: object required: + - type - function title: ChatCompletionTool description: An OpenAI-compatible tool definition (currently always a function diff --git a/openapi/openapi.yaml b/openapi/openapi.yaml index 96e7ade804..8475d741a0 100644 --- a/openapi/openapi.yaml +++ b/openapi/openapi.yaml @@ -6598,6 +6598,7 @@ paths: required: false schema: type: integer + minimum: 1 description: Page number. default: 1 title: Page @@ -6607,6 +6608,8 @@ paths: required: false schema: type: integer + maximum: 1000 + minimum: 1 description: Page size. default: 100 title: Page Size @@ -8407,13 +8410,13 @@ components: const: function title: Type description: The type of the tool. Currently only 'function' is supported. - default: function function: allOf: - $ref: '#/components/schemas/FunctionDefinition' description: The function definition for this tool. type: object required: + - type - function title: ChatCompletionTool description: An OpenAI-compatible tool definition (currently always a function diff --git a/sdk/stainless.yaml b/sdk/stainless.yaml index e000ff3ae4..c695227c70 100644 --- a/sdk/stainless.yaml +++ b/sdk/stainless.yaml @@ -721,6 +721,7 @@ resources: tool_calling_metadata_content: ToolCallingMetadataContent backend_format: BackendFormat finetuning_type: FinetuningType + inference_params: InferenceParams iam: standalone_api: true subresources: diff --git a/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml b/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml index be49cf053d..13d6dd6da8 100644 --- a/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml +++ b/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml @@ -298,6 +298,7 @@ authz: - models.adapters.list - models.adapters.read - models.list + - models.prompts.read - models.read - projects.list - projects.read @@ -349,6 +350,9 @@ authz: - models.adapters.update - models.create - models.delete + - models.prompts.create + - models.prompts.delete + - models.prompts.update - models.update - projects.create - projects.delete @@ -1338,6 +1342,38 @@ authz: scopes: - models:write - platform:write + /apis/models/v2/workspaces/{workspace}/prompts: + get: + permissions: + - models.prompts.read + scopes: + - models:read + - platform:read + post: + permissions: + - models.prompts.create + scopes: + - models:write + - platform:write + /apis/models/v2/workspaces/{workspace}/prompts/{name}: + delete: + permissions: + - models.prompts.delete + scopes: + - models:write + - platform:write + get: + permissions: + - models.prompts.read + scopes: + - models:read + - platform:read + put: + permissions: + - models.prompts.update + scopes: + - models:write + - platform:write /apis/models/v2/workspaces/{workspace}/providers: get: permissions: From e0e291b36abe10cf4f5fd1c87698a45d21a9aa70 Mon Sep 17 00:00:00 2001 From: Sean Teramae Date: Thu, 11 Jun 2026 17:17:48 -0700 Subject: [PATCH 07/10] fix stainless Signed-off-by: Sean Teramae --- .../nemo-platform/.nmpcontext/openapi.yaml | 563 +++++++++++++ .../nemo-platform/.nmpcontext/stainless.yaml | 190 +++-- sdk/python/nemo-platform/api.md | 1 - .../src/nemo_platform/resources/files/api.md | 2 +- .../nemo_platform/resources/files/filesets.py | 11 +- .../nemo_platform/resources/guardrail/api.md | 5 + .../resources/inference/__init__.py | 14 + .../nemo_platform/resources/inference/api.md | 27 + .../resources/inference/inference.py | 32 + .../resources/inference/prompts.py | 743 ++++++++++++++++++ .../src/nemo_platform/resources/jobs/api.md | 5 + .../src/nemo_platform/resources/jobs/jobs.py | 1 + .../src/nemo_platform/types/__init__.py | 1 - .../src/nemo_platform/types/files/__init__.py | 2 + .../src/nemo_platform/types/files/fileset.py | 2 +- .../types/files/fileset_create_params.py | 4 +- .../{shared => files}/fileset_metadata.py | 4 +- .../fileset_metadata_param.py} | 8 +- .../types/files/fileset_update_params.py | 4 +- .../nemo_platform/types/inference/__init__.py | 14 + .../types/inference/chat_completion_tool.py | 37 + .../inference/chat_completion_tool_param.py | 38 + .../types/inference/function_definition.py | 45 ++ .../inference/function_definition_param.py | 46 ++ .../nemo_platform/types/inference/prompt.py | 96 +++ .../types/inference/prompt_create_params.py | 64 ++ .../types/inference/prompt_filter_param.py | 46 ++ .../types/inference/prompt_list_params.py | 47 ++ .../types/inference/prompt_message.py | 39 + .../types/inference/prompt_message_param.py | 42 + .../types/inference/prompt_message_role.py | 22 + .../types/inference/prompt_sort.py | 22 + .../types/inference/prompt_update_params.py | 57 ++ .../types/inference/prompts_page.py | 37 + .../nemo_platform/types/shared/__init__.py | 1 - .../types/shared_params/__init__.py | 1 - .../api_resources/inference/test_prompts.py | 741 +++++++++++++++++ sdk/stainless.yaml | 173 ++-- 38 files changed, 2993 insertions(+), 194 deletions(-) create mode 100644 sdk/python/nemo-platform/src/nemo_platform/resources/inference/prompts.py rename sdk/python/nemo-platform/src/nemo_platform/types/{shared => files}/fileset_metadata.py (91%) rename sdk/python/nemo-platform/src/nemo_platform/types/{shared_params/fileset_metadata.py => files/fileset_metadata_param.py} (85%) create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool_param.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition_param.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_create_params.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_filter_param.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_list_params.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_param.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_role.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_sort.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_update_params.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/inference/prompts_page.py create mode 100644 sdk/python/nemo-platform/tests/api_resources/inference/test_prompts.py diff --git a/sdk/python/nemo-platform/.nmpcontext/openapi.yaml b/sdk/python/nemo-platform/.nmpcontext/openapi.yaml index dba0e62e51..9044b2eaef 100644 --- a/sdk/python/nemo-platform/.nmpcontext/openapi.yaml +++ b/sdk/python/nemo-platform/.nmpcontext/openapi.yaml @@ -6661,6 +6661,205 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /apis/models/v2/workspaces/{workspace}/prompts: + get: + tags: + - Prompts + summary: List Prompts By Workspace + description: List prompts for a specific workspace. + operationId: list_prompts_apis_models_v2_workspaces__workspace__prompts_get + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: page + in: query + required: false + schema: + type: integer + minimum: 1 + description: Page number. + default: 1 + title: Page + description: Page number. + - name: page_size + in: query + required: false + schema: + type: integer + maximum: 1000 + minimum: 1 + description: Page size. + default: 100 + title: Page Size + description: Page size. + - name: sort + in: query + required: false + schema: + allOf: + - $ref: '#/components/schemas/PromptSort' + description: The field to sort by. To sort in decreasing order, use `-` + in front of the field name. + default: created_at + description: The field to sort by. To sort in decreasing order, use `-` in + front of the field name. + - in: query + name: filter + style: deepObject + required: false + explode: true + schema: + $ref: '#/components/schemas/PromptFilter' + description: Filter prompts by workspace, project, name, description, created_at, + and updated_at. + responses: + '200': + description: Return prompts for a workspace + content: + application/json: + schema: + $ref: '#/components/schemas/PromptsPage' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + post: + tags: + - Prompts + summary: Create Prompt + description: Create a new prompt. + operationId: create_prompt_apis_models_v2_workspaces__workspace__prompts_post + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePromptRequest' + responses: + '201': + description: Create a new prompt + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /apis/models/v2/workspaces/{workspace}/prompts/{name}: + get: + tags: + - Prompts + summary: Get Prompt + description: Get a prompt by workspace and name. + operationId: get_prompt_apis_models_v2_workspaces__workspace__prompts__name__get + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + responses: + '200': + description: Return prompt details + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + put: + tags: + - Prompts + summary: Update Prompt + description: Update an existing prompt (full replacement of mutable fields). + operationId: update_prompt_apis_models_v2_workspaces__workspace__prompts__name__put + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdatePromptRequest' + responses: + '200': + description: Update an existing prompt + content: + application/json: + schema: + $ref: '#/components/schemas/Prompt' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + delete: + tags: + - Prompts + summary: Delete Prompt + description: Delete a prompt by workspace and name. + operationId: delete_prompt_apis_models_v2_workspaces__workspace__prompts__name__delete + parameters: + - name: workspace + in: path + required: true + schema: + type: string + title: Workspace + - name: name + in: path + required: true + schema: + type: string + title: Name + responses: + '204': + description: Delete a prompt + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' /apis/models/v2/workspaces/{workspace}/providers: get: tags: @@ -8294,6 +8493,24 @@ components: - role title: ChatCompletionSystemMessageParam description: System message parameter for chat completion. + ChatCompletionTool: + properties: + type: + type: string + const: function + title: Type + description: The type of the tool. Currently only 'function' is supported. + function: + allOf: + - $ref: '#/components/schemas/FunctionDefinition' + description: The function definition for this tool. + type: object + required: + - type + - function + title: ChatCompletionTool + description: An OpenAI-compatible tool definition (currently always a function + tool). ChatCompletionToolMessageParam: properties: content: @@ -9083,6 +9300,65 @@ components: - source title: CreatePlatformJobRequest description: Request model for creating a new platform job. + CreatePromptRequest: + properties: + name: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Name + description: 'Name of the prompt. Allowed characters: letters (a-z, A-Z), + digits (0-9), underscores, hyphens, and dots.' + examples: + - support-bot-system + - summarizer + project: + title: Project + description: The URN of the project associated with this prompt. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + description: + title: Description + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + input_variables: + items: + type: string + type: array + title: Input Variables + tools: + title: Tools + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + response_format: + title: Response Format + additionalProperties: true + type: object + inference_params: + $ref: '#/components/schemas/InferenceParams' + tags: + title: Tags + items: + type: string + type: array + type: object + required: + - name + title: CreatePromptRequest + description: Request model for creating a Prompt. CreateVirtualModelRequest: properties: default_model_entity: @@ -10716,6 +10992,39 @@ components: - name title: FunctionCall description: Function call information. + FunctionDefinition: + properties: + name: + type: string + maxLength: 255 + title: Name + description: The name of the function to be called. + description: + title: Description + description: A description of what the function does, used by the model + to decide when and how to call it. + type: string + parameters: + title: Parameters + description: The parameters the function accepts, described as a JSON Schema + object. + additionalProperties: true + type: object + strict: + title: Strict + description: Whether to enforce strict schema adherence when generating + the function call. + type: boolean + type: object + required: + - name + title: FunctionDefinition + description: 'An OpenAI-compatible function definition for tool calling. + + + Mirrors the ``function`` object the Inference Gateway forwards to + + OpenAI-compatible backends.' GLiNERDetection: properties: server_endpoint: @@ -15269,6 +15578,110 @@ components: required: - data title: ProjectsPage + Prompt: + properties: + id: + type: string + title: Id + description: Unique identifier for the prompt. + name: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Name + description: 'Name of the entity. Name/workspace combo must be unique across + all entities. Allowed characters: letters (a-z, A-Z), digits (0-9), underscores, + hyphens, and dots.' + examples: + - llama-3.1-8b + - my-custom-model + workspace: + type: string + maxLength: 255 + pattern: ^[\w\-.]+$ + title: Workspace + description: 'The workspace of the entity. Allowed characters: letters (a-z, + A-Z), digits (0-9), underscores, hyphens, and dots.' + project: + title: Project + description: The URN of the project associated with this entity. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + created_at: + type: string + format: date-time + title: Created At + description: The timestamp of model entity creation + updated_at: + type: string + format: date-time + title: Updated At + description: The timestamp of the last model entity update + description: + title: Description + description: Optional description of the prompt. + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + description: Ordered list of chat messages that make up the prompt. + input_variables: + items: + type: string + type: array + title: Input Variables + description: Names of the Jinja2 template variables the prompt expects. + tools: + title: Tools + description: Optional OpenAI-compatible tool definitions to send with the + prompt. + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + description: 'Controls which (if any) tool is called: ''none'', ''auto'', + ''required'', or a named-tool object.' + response_format: + title: Response Format + description: Optional OpenAI-compatible response_format, e.g. a json_schema + structured-output spec. + additionalProperties: true + type: object + inference_params: + allOf: + - $ref: '#/components/schemas/InferenceParams' + description: Optional default model and sampling parameters (temperature, + top_p, max_tokens, ...). + tags: + items: + type: string + type: array + title: Tags + description: Optional free-form tags for organizing prompts. + type: object + required: + - name + - workspace + - created_at + - updated_at + title: Prompt + description: 'A reusable, stored chat prompt. + + + A Prompt captures the messages, declared template variables, optional tool + + definitions, and default inference parameters needed to invoke a model + + through the Inference Gateway. The unique identifier is workspace/name.' PromptData: properties: system_prompt: @@ -15291,6 +15704,106 @@ components: type: object title: PromptData description: Configuration for prompt engineering. + PromptFilter: + additionalProperties: false + description: Filter for Prompt queries. + properties: + workspace: + description: Filter by workspace. + title: Workspace + type: string + project: + description: Filter by project URN. + title: Project + type: string + name: + description: Filter by name. + title: Name + type: string + description: + description: Filter by description. + title: Description + type: string + created_at: + allOf: + - $ref: '#/components/schemas/DatetimeFilter' + description: Filter by creation date. + updated_at: + allOf: + - $ref: '#/components/schemas/DatetimeFilter' + description: Filter by update date. + title: PromptFilter + type: object + PromptMessage: + properties: + role: + allOf: + - $ref: '#/components/schemas/PromptMessageRole' + description: The role of the message author. + content: + type: string + title: Content + description: Templated message content. May contain template variables. + type: object + required: + - role + - content + title: PromptMessage + description: 'A single templated message in a chat prompt. + + + ``content`` is a Jinja2 template body that may reference the prompt''s + + declared ``input_variables`` (e.g. ``{{ topic }}``).' + PromptMessageRole: + type: string + enum: + - system + - developer + - user + - assistant + title: PromptMessageRole + description: 'Role of a message author in a chat prompt. + + + Follows the OpenAI chat schema the Inference Gateway speaks + + (``/v1/chat/completions``).' + PromptSort: + type: string + enum: + - name + - -name + - created_at + - -created_at + - updated_at + - -updated_at + title: PromptSort + description: Sort fields for Prompt queries. + PromptsPage: + properties: + data: + items: + $ref: '#/components/schemas/Prompt' + type: array + title: Data + pagination: + allOf: + - $ref: '#/components/schemas/PaginationData' + description: Pagination information. + sort: + title: Sort + description: The field on which the results are sorted. + type: string + filter: + title: Filter + description: Filtering information. + additionalProperties: true + type: object + type: object + required: + - data + title: PromptsPage RailStatus: properties: status: @@ -17159,6 +17672,56 @@ components: This endpoint supports partial updates for fields managed by Models Controller.' + UpdatePromptRequest: + properties: + project: + title: Project + description: The URN of the project associated with this prompt. + type: string + maxLength: 255 + pattern: ^[\w\-./]+$ + description: + title: Description + type: string + maxLength: 1000 + messages: + items: + $ref: '#/components/schemas/PromptMessage' + type: array + title: Messages + input_variables: + items: + type: string + type: array + title: Input Variables + tools: + title: Tools + items: + $ref: '#/components/schemas/ChatCompletionTool' + type: array + tool_choice: + anyOf: + - type: string + - additionalProperties: true + type: object + title: Tool Choice + response_format: + title: Response Format + additionalProperties: true + type: object + inference_params: + $ref: '#/components/schemas/InferenceParams' + tags: + title: Tags + items: + type: string + type: array + type: object + title: UpdatePromptRequest + description: 'Request model for replacing a Prompt''s mutable fields (full update). + + + The prompt name and workspace come from the URL path and cannot be changed.' UpdateVirtualModelRequest: properties: default_model_entity: diff --git a/sdk/python/nemo-platform/.nmpcontext/stainless.yaml b/sdk/python/nemo-platform/.nmpcontext/stainless.yaml index 7d5a9da0e6..e96b34b9e4 100644 --- a/sdk/python/nemo-platform/.nmpcontext/stainless.yaml +++ b/sdk/python/nemo-platform/.nmpcontext/stainless.yaml @@ -26,16 +26,16 @@ custom_casings: initialism: true targets: -# typescript: -# _skip_running_tests: false -# package_name: nemo-platform-v1 -# publish: -# npm: false -# skip: false -# options: -# mcp_server: -# package_name: nemo-platform-v1-mcp -# enable_all_resources: true + # typescript: + # _skip_running_tests: false + # package_name: nemo-platform-v1 + # publish: + # npm: false + # skip: false + # options: + # mcp_server: + # package_name: nemo-platform-v1-mcp + # enable_all_resources: true python: # [docs]: https://www.stainless.com/docs/reference/editions @@ -81,86 +81,86 @@ client_settings: # `pagination` defines [pagination schemes] which provides a template to match # endpoints and generate next-page and auto-pagination helpers in the SDKs. pagination: -- name: default_pagination - type: page_number - request: - page: - type: integer - x-stainless-pagination-property: - purpose: page_number_param - page_size: - type: integer - response: - data: - type: array - x-stainless-pagination-property: - purpose: items - items: + - name: default_pagination + type: page_number + request: + page: + type: integer + x-stainless-pagination-property: + purpose: page_number_param + page_size: + type: integer + response: + data: + type: array + x-stainless-pagination-property: + purpose: items + items: + type: object + additionalProperties: true + pagination: type: object - additionalProperties: true - pagination: - type: object - properties: - page: - type: integer - title: Page - description: The current page number. - x-stainless-pagination-property: - purpose: current_page_number_field - page_size: - type: integer - title: Page Size - description: The page size used for the query. - current_page_size: - type: integer - title: Current Page Size - description: The size for the current page. - total_pages: - type: integer - title: Total Pages - description: The total number of pages. - x-stainless-pagination-property: - purpose: total_page_count_field - total_results: - type: integer - title: Total Results - description: The total number of results. - required: - - page - - page_size - - total_pages - - total_results - - current_page_size -- name: logs_pagination - type: cursor - request: - limit: - type: integer - page_cursor: - type: string - x-stainless-pagination-property: - purpose: next_cursor_param - response: - data: - type: array - x-stainless-pagination-property: - purpose: items - items: - type: object - additionalProperties: true - next_page: - type: string - x-stainless-pagination-property: - purpose: next_cursor_field + properties: + page: + type: integer + title: Page + description: The current page number. + x-stainless-pagination-property: + purpose: current_page_number_field + page_size: + type: integer + title: Page Size + description: The page size used for the query. + current_page_size: + type: integer + title: Current Page Size + description: The size for the current page. + total_pages: + type: integer + title: Total Pages + description: The total number of pages. + x-stainless-pagination-property: + purpose: total_page_count_field + total_results: + type: integer + title: Total Results + description: The total number of results. + required: + - page + - page_size + - total_pages + - total_results + - current_page_size + - name: logs_pagination + type: cursor + request: + limit: + type: integer + page_cursor: + type: string + x-stainless-pagination-property: + purpose: next_cursor_param + response: + data: + type: array + x-stainless-pagination-property: + purpose: items + items: + type: object + additionalProperties: true + next_page: + type: string + x-stainless-pagination-property: + purpose: next_cursor_field streaming: on_event: - - data_starts_with: "[DONE]" - handle: done - - event_type: error - handle: error - - event_type: - handle: yield + - data_starts_with: "[DONE]" + handle: done + - event_type: error + handle: error + - event_type: + handle: yield readme: example_requests: @@ -464,6 +464,24 @@ resources: update: put /apis/models/v2/workspaces/{workspace}/providers/{name} delete: delete /apis/models/v2/workspaces/{workspace}/providers/{name} update_status: put /apis/models/v2/workspaces/{workspace}/providers/{name}/status + prompts: + models: + chat_completion_tool: ChatCompletionTool + create_prompt_request: CreatePromptRequest + function_definition: FunctionDefinition + prompt: Prompt + prompt_filter: PromptFilter + prompt_message: PromptMessage + prompt_message_role: PromptMessageRole + prompt_sort: PromptSort + prompts_page: PromptsPage + update_prompt_request: UpdatePromptRequest + methods: + list: get /apis/models/v2/workspaces/{workspace}/prompts + create: post /apis/models/v2/workspaces/{workspace}/prompts + retrieve: get /apis/models/v2/workspaces/{workspace}/prompts/{name} + update: put /apis/models/v2/workspaces/{workspace}/prompts/{name} + delete: delete /apis/models/v2/workspaces/{workspace}/prompts/{name} gateway: subresources: openai: diff --git a/sdk/python/nemo-platform/api.md b/sdk/python/nemo-platform/api.md index 271d51d2df..a0e07c72cd 100644 --- a/sdk/python/nemo-platform/api.md +++ b/sdk/python/nemo-platform/api.md @@ -10,7 +10,6 @@ from nemo_platform.types import ( DatetimeFilter, DeleteResponse, FileStorageType, - FilesetMetadata, FinetuningType, GenericSortField, HTTPValidationError, diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md b/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md index 72e7b5ca66..882f649add 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md @@ -33,7 +33,7 @@ Methods: Types: ```python -from nemo_platform.types.files import FilesetFilter +from nemo_platform.types.files import FilesetFilter, FilesetMetadata, FilesetMetadataParam ``` Methods: diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py b/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py index f8fb167cf2..018acd45b4 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py @@ -34,6 +34,7 @@ from ...pagination import SyncDefaultPagination, AsyncDefaultPagination from ...types.files import ( FilesetPurpose, + FilesetMetadataParam, fileset_list_params, fileset_create_params, fileset_update_params, @@ -43,7 +44,7 @@ from ...types.files.fileset_purpose import FilesetPurpose from ...types.shared.generic_sort_field import GenericSortField from ...types.files.fileset_filter_param import FilesetFilterParam -from ...types.shared_params.fileset_metadata import FilesetMetadata +from ...types.files.fileset_metadata_param import FilesetMetadataParam from ..._exceptions import ConflictError __all__ = ["FilesetsResource", "AsyncFilesetsResource"] @@ -77,7 +78,7 @@ def create( cache: bool | Omit = omit, custom_fields: Dict[str, object] | Omit = omit, description: str | Omit = omit, - metadata: FilesetMetadata | Omit = omit, + metadata: FilesetMetadataParam | Omit = omit, project: str | Omit = omit, purpose: FilesetPurpose | Omit = omit, storage: fileset_create_params.Storage | Omit = omit, @@ -206,7 +207,7 @@ def update( workspace: str | None = None, custom_fields: Dict[str, object] | Omit = omit, description: str | Omit = omit, - metadata: FilesetMetadata | Omit = omit, + metadata: FilesetMetadataParam | Omit = omit, project: str | Omit = omit, purpose: FilesetPurpose | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -402,7 +403,7 @@ async def create( cache: bool | Omit = omit, custom_fields: Dict[str, object] | Omit = omit, description: str | Omit = omit, - metadata: FilesetMetadata | Omit = omit, + metadata: FilesetMetadataParam | Omit = omit, project: str | Omit = omit, purpose: FilesetPurpose | Omit = omit, storage: fileset_create_params.Storage | Omit = omit, @@ -531,7 +532,7 @@ async def update( workspace: str | None = None, custom_fields: Dict[str, object] | Omit = omit, description: str | Omit = omit, - metadata: FilesetMetadata | Omit = omit, + metadata: FilesetMetadataParam | Omit = omit, project: str | Omit = omit, purpose: FilesetPurpose | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/guardrail/api.md b/sdk/python/nemo-platform/src/nemo_platform/resources/guardrail/api.md index 52c2cf31fd..4fe014901d 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/guardrail/api.md +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/guardrail/api.md @@ -60,14 +60,19 @@ from nemo_platform.types.guardrail import ( PangeaRailOptions, PatronusEvaluateAPIParams, PatronusEvaluateConfig, + PatronusEvaluateConfigParam, PatronusEvaluationSuccessStrategy, PatronusRailConfig, + PatronusRailConfigParam, PrivateAIDetection, PrivateAIDetectionOptions, RailStatus, Rails, RailsConfig, RailsConfigData, + RailsConfigDataParam, + RailsConfigParam, + RailsParam, ReasoningConfig, RegexDetection, RegexDetectionOptions, diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/__init__.py index 83bee2909a..59c3f75914 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/__init__.py +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/__init__.py @@ -31,6 +31,14 @@ GatewayResourceWithStreamingResponse, AsyncGatewayResourceWithStreamingResponse, ) +from .prompts import ( + PromptsResource, + AsyncPromptsResource, + PromptsResourceWithRawResponse, + AsyncPromptsResourceWithRawResponse, + PromptsResourceWithStreamingResponse, + AsyncPromptsResourceWithStreamingResponse, +) from .inference import ( InferenceResource, AsyncInferenceResource, @@ -103,6 +111,12 @@ "AsyncProvidersResourceWithRawResponse", "ProvidersResourceWithStreamingResponse", "AsyncProvidersResourceWithStreamingResponse", + "PromptsResource", + "AsyncPromptsResource", + "PromptsResourceWithRawResponse", + "AsyncPromptsResourceWithRawResponse", + "PromptsResourceWithStreamingResponse", + "AsyncPromptsResourceWithStreamingResponse", "GatewayResource", "AsyncGatewayResource", "GatewayResourceWithRawResponse", diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/api.md b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/api.md index b8a3fa9f6d..dca686fe43 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/api.md +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/api.md @@ -146,6 +146,33 @@ Methods: - client.inference.providers.delete(name, \*, workspace) -> None - client.inference.providers.update_status(name, \*, workspace, \*\*params) -> ModelProvider +## Prompts + +Types: + +```python +from nemo_platform.types.inference import ( + ChatCompletionTool, + CreatePromptRequest, + FunctionDefinition, + Prompt, + PromptFilter, + PromptMessage, + PromptMessageRole, + PromptSort, + PromptsPage, + UpdatePromptRequest, +) +``` + +Methods: + +- client.inference.prompts.create(\*, workspace, \*\*params) -> Prompt +- client.inference.prompts.retrieve(name, \*, workspace) -> Prompt +- client.inference.prompts.update(name, \*, workspace, \*\*params) -> Prompt +- client.inference.prompts.list(\*, workspace, \*\*params) -> SyncDefaultPagination[Prompt] +- client.inference.prompts.delete(name, \*, workspace) -> None + ## Gateway ### OpenAI diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/inference.py b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/inference.py index f9d80a024e..5abbe23ce3 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/inference.py +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/inference.py @@ -25,6 +25,14 @@ ModelsResourceWithStreamingResponse, AsyncModelsResourceWithStreamingResponse, ) +from .prompts import ( + PromptsResource, + AsyncPromptsResource, + PromptsResourceWithRawResponse, + AsyncPromptsResourceWithRawResponse, + PromptsResourceWithStreamingResponse, + AsyncPromptsResourceWithStreamingResponse, +) from ..._compat import cached_property from .providers import ( ProvidersResource, @@ -92,6 +100,10 @@ def deployments(self) -> DeploymentsResource: def providers(self) -> ProvidersResource: return ProvidersResource(self._client) + @cached_property + def prompts(self) -> PromptsResource: + return PromptsResource(self._client) + @cached_property def gateway(self) -> GatewayResource: return GatewayResource(self._client) @@ -137,6 +149,10 @@ def deployments(self) -> AsyncDeploymentsResource: def providers(self) -> AsyncProvidersResource: return AsyncProvidersResource(self._client) + @cached_property + def prompts(self) -> AsyncPromptsResource: + return AsyncPromptsResource(self._client) + @cached_property def gateway(self) -> AsyncGatewayResource: return AsyncGatewayResource(self._client) @@ -185,6 +201,10 @@ def deployments(self) -> DeploymentsResourceWithRawResponse: def providers(self) -> ProvidersResourceWithRawResponse: return ProvidersResourceWithRawResponse(self._inference.providers) + @cached_property + def prompts(self) -> PromptsResourceWithRawResponse: + return PromptsResourceWithRawResponse(self._inference.prompts) + @cached_property def gateway(self) -> GatewayResourceWithRawResponse: return GatewayResourceWithRawResponse(self._inference.gateway) @@ -214,6 +234,10 @@ def deployments(self) -> AsyncDeploymentsResourceWithRawResponse: def providers(self) -> AsyncProvidersResourceWithRawResponse: return AsyncProvidersResourceWithRawResponse(self._inference.providers) + @cached_property + def prompts(self) -> AsyncPromptsResourceWithRawResponse: + return AsyncPromptsResourceWithRawResponse(self._inference.prompts) + @cached_property def gateway(self) -> AsyncGatewayResourceWithRawResponse: return AsyncGatewayResourceWithRawResponse(self._inference.gateway) @@ -243,6 +267,10 @@ def deployments(self) -> DeploymentsResourceWithStreamingResponse: def providers(self) -> ProvidersResourceWithStreamingResponse: return ProvidersResourceWithStreamingResponse(self._inference.providers) + @cached_property + def prompts(self) -> PromptsResourceWithStreamingResponse: + return PromptsResourceWithStreamingResponse(self._inference.prompts) + @cached_property def gateway(self) -> GatewayResourceWithStreamingResponse: return GatewayResourceWithStreamingResponse(self._inference.gateway) @@ -272,6 +300,10 @@ def deployments(self) -> AsyncDeploymentsResourceWithStreamingResponse: def providers(self) -> AsyncProvidersResourceWithStreamingResponse: return AsyncProvidersResourceWithStreamingResponse(self._inference.providers) + @cached_property + def prompts(self) -> AsyncPromptsResourceWithStreamingResponse: + return AsyncPromptsResourceWithStreamingResponse(self._inference.prompts) + @cached_property def gateway(self) -> AsyncGatewayResourceWithStreamingResponse: return AsyncGatewayResourceWithStreamingResponse(self._inference.gateway) diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/inference/prompts.py b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/prompts.py new file mode 100644 index 0000000000..d27e1de6dc --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/inference/prompts.py @@ -0,0 +1,743 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable + +import httpx + +from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given +from ..._utils import path_template, maybe_transform, async_maybe_transform +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from ...pagination import SyncDefaultPagination, AsyncDefaultPagination +from ..._base_client import AsyncPaginator, make_request_options +from ...types.inference import PromptSort, prompt_list_params, prompt_create_params, prompt_update_params +from ...types.inference.prompt import Prompt +from ...types.inference.prompt_sort import PromptSort +from ...types.inference.prompt_filter_param import PromptFilterParam +from ...types.inference.prompt_message_param import PromptMessageParam +from ...types.shared_params.inference_params import InferenceParams +from ...types.inference.chat_completion_tool_param import ChatCompletionToolParam +from ..._exceptions import ConflictError + +__all__ = ["PromptsResource", "AsyncPromptsResource"] + + +class PromptsResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> PromptsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#accessing-raw-response-data-e-g-headers + """ + return PromptsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> PromptsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#with_streaming_response + """ + return PromptsResourceWithStreamingResponse(self) + + def create( + self, + *, + workspace: str | None = None, + name: str, + description: str | Omit = omit, + inference_params: InferenceParams | Omit = omit, + input_variables: SequenceNotStr[str] | Omit = omit, + messages: Iterable[PromptMessageParam] | Omit = omit, + project: str | Omit = omit, + response_format: Dict[str, object] | Omit = omit, + tags: SequenceNotStr[str] | Omit = omit, + tool_choice: Union[str, Dict[str, object]] | Omit = omit, + tools: Iterable[ChatCompletionToolParam] | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + exist_ok: bool = False, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> Prompt: + """Create a new prompt. + + Args: + name: Name of the prompt. + + Allowed characters: letters (a-z, A-Z), digits (0-9), + underscores, hyphens, and dots. + + inference_params: Parameters for model inference. Extra fields can be supplied for additional + options applied to the inference request directly. Fields not supported by the + model may cause inference errors during evaluation. + + project: The URN of the project associated with this prompt. + + + exist_ok: Do not raise an error if the resource already exists. Returns the existing resource. + + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + try: + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + return self._post( + path_template("/apis/models/v2/workspaces/{workspace}/prompts", workspace=workspace), + body=maybe_transform( + { + "name": name, + "description": description, + "inference_params": inference_params, + "input_variables": input_variables, + "messages": messages, + "project": project, + "response_format": response_format, + "tags": tags, + "tool_choice": tool_choice, + "tools": tools, + }, + prompt_create_params.PromptCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Prompt, + ) + except ConflictError: + if not exist_ok: + raise + return self.retrieve(name = name, workspace = workspace) + + def retrieve( + self, + name: str, + *, + workspace: str | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> Prompt: + """ + Get a prompt by workspace and name. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + if not name: + raise ValueError(f"Expected a non-empty value for `name` but received {name!r}") + return self._get( + path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Prompt, + ) + + def update( + self, + name: str, + *, + workspace: str | None = None, + description: str | Omit = omit, + inference_params: InferenceParams | Omit = omit, + input_variables: SequenceNotStr[str] | Omit = omit, + messages: Iterable[PromptMessageParam] | Omit = omit, + project: str | Omit = omit, + response_format: Dict[str, object] | Omit = omit, + tags: SequenceNotStr[str] | Omit = omit, + tool_choice: Union[str, Dict[str, object]] | Omit = omit, + tools: Iterable[ChatCompletionToolParam] | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> Prompt: + """ + Update an existing prompt (full replacement of mutable fields). + + Args: + inference_params: Parameters for model inference. Extra fields can be supplied for additional + options applied to the inference request directly. Fields not supported by the + model may cause inference errors during evaluation. + + project: The URN of the project associated with this prompt. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + if not name: + raise ValueError(f"Expected a non-empty value for `name` but received {name!r}") + return self._put( + path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name), + body=maybe_transform( + { + "description": description, + "inference_params": inference_params, + "input_variables": input_variables, + "messages": messages, + "project": project, + "response_format": response_format, + "tags": tags, + "tool_choice": tool_choice, + "tools": tools, + }, + prompt_update_params.PromptUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Prompt, + ) + + def list( + self, + *, + workspace: str | None = None, + filter: PromptFilterParam | Omit = omit, + page: int | Omit = omit, + page_size: int | Omit = omit, + sort: PromptSort | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> SyncDefaultPagination[Prompt]: + """ + List prompts for a specific workspace. + + Args: + filter: Filter prompts by workspace, project, name, description, created_at, and + updated_at. + + page: Page number. + + page_size: Page size. + + sort: The field to sort by. To sort in decreasing order, use `-` in front of the field + name. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + return self._get_api_list( + path_template("/apis/models/v2/workspaces/{workspace}/prompts", workspace=workspace), + page=SyncDefaultPagination[Prompt], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "filter": filter, + "page": page, + "page_size": page_size, + "sort": sort, + }, + prompt_list_params.PromptListParams, + ), + ), + model=Prompt, + ) + + def delete( + self, + name: str, + *, + workspace: str | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> None: + """ + Delete a prompt by workspace and name. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + if not name: + raise ValueError(f"Expected a non-empty value for `name` but received {name!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return self._delete( + path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class AsyncPromptsResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncPromptsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#accessing-raw-response-data-e-g-headers + """ + return AsyncPromptsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncPromptsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#with_streaming_response + """ + return AsyncPromptsResourceWithStreamingResponse(self) + + async def create( + self, + *, + workspace: str | None = None, + name: str, + description: str | Omit = omit, + inference_params: InferenceParams | Omit = omit, + input_variables: SequenceNotStr[str] | Omit = omit, + messages: Iterable[PromptMessageParam] | Omit = omit, + project: str | Omit = omit, + response_format: Dict[str, object] | Omit = omit, + tags: SequenceNotStr[str] | Omit = omit, + tool_choice: Union[str, Dict[str, object]] | Omit = omit, + tools: Iterable[ChatCompletionToolParam] | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + exist_ok: bool = False, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> Prompt: + """Create a new prompt. + + Args: + name: Name of the prompt. + + Allowed characters: letters (a-z, A-Z), digits (0-9), + underscores, hyphens, and dots. + + inference_params: Parameters for model inference. Extra fields can be supplied for additional + options applied to the inference request directly. Fields not supported by the + model may cause inference errors during evaluation. + + project: The URN of the project associated with this prompt. + + + exist_ok: Do not raise an error if the resource already exists. Returns the existing resource. + + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + try: + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + return await self._post( + path_template("/apis/models/v2/workspaces/{workspace}/prompts", workspace=workspace), + body=await async_maybe_transform( + { + "name": name, + "description": description, + "inference_params": inference_params, + "input_variables": input_variables, + "messages": messages, + "project": project, + "response_format": response_format, + "tags": tags, + "tool_choice": tool_choice, + "tools": tools, + }, + prompt_create_params.PromptCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Prompt, + ) + except ConflictError: + if not exist_ok: + raise + return await self.retrieve(name = name, workspace = workspace) + + async def retrieve( + self, + name: str, + *, + workspace: str | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> Prompt: + """ + Get a prompt by workspace and name. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + if not name: + raise ValueError(f"Expected a non-empty value for `name` but received {name!r}") + return await self._get( + path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Prompt, + ) + + async def update( + self, + name: str, + *, + workspace: str | None = None, + description: str | Omit = omit, + inference_params: InferenceParams | Omit = omit, + input_variables: SequenceNotStr[str] | Omit = omit, + messages: Iterable[PromptMessageParam] | Omit = omit, + project: str | Omit = omit, + response_format: Dict[str, object] | Omit = omit, + tags: SequenceNotStr[str] | Omit = omit, + tool_choice: Union[str, Dict[str, object]] | Omit = omit, + tools: Iterable[ChatCompletionToolParam] | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> Prompt: + """ + Update an existing prompt (full replacement of mutable fields). + + Args: + inference_params: Parameters for model inference. Extra fields can be supplied for additional + options applied to the inference request directly. Fields not supported by the + model may cause inference errors during evaluation. + + project: The URN of the project associated with this prompt. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + if not name: + raise ValueError(f"Expected a non-empty value for `name` but received {name!r}") + return await self._put( + path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name), + body=await async_maybe_transform( + { + "description": description, + "inference_params": inference_params, + "input_variables": input_variables, + "messages": messages, + "project": project, + "response_format": response_format, + "tags": tags, + "tool_choice": tool_choice, + "tools": tools, + }, + prompt_update_params.PromptUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Prompt, + ) + + def list( + self, + *, + workspace: str | None = None, + filter: PromptFilterParam | Omit = omit, + page: int | Omit = omit, + page_size: int | Omit = omit, + sort: PromptSort | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> AsyncPaginator[Prompt, AsyncDefaultPagination[Prompt]]: + """ + List prompts for a specific workspace. + + Args: + filter: Filter prompts by workspace, project, name, description, created_at, and + updated_at. + + page: Page number. + + page_size: Page size. + + sort: The field to sort by. To sort in decreasing order, use `-` in front of the field + name. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + return self._get_api_list( + path_template("/apis/models/v2/workspaces/{workspace}/prompts", workspace=workspace), + page=AsyncDefaultPagination[Prompt], + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "filter": filter, + "page": page, + "page_size": page_size, + "sort": sort, + }, + prompt_list_params.PromptListParams, + ), + ), + model=Prompt, + ) + + async def delete( + self, + name: str, + *, + workspace: str | None = None, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> None: + """ + Delete a prompt by workspace and name. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if workspace is None: + workspace = self._client._get_workspace_path_param() + if not workspace: + raise ValueError(f"Expected a non-empty value for `workspace` but received {workspace!r}") + if not name: + raise ValueError(f"Expected a non-empty value for `name` but received {name!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return await self._delete( + path_template("/apis/models/v2/workspaces/{workspace}/prompts/{name}", workspace=workspace, name=name), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class PromptsResourceWithRawResponse: + def __init__(self, prompts: PromptsResource) -> None: + self._prompts = prompts + + self.create = to_raw_response_wrapper( + prompts.create, + ) + self.retrieve = to_raw_response_wrapper( + prompts.retrieve, + ) + self.update = to_raw_response_wrapper( + prompts.update, + ) + self.list = to_raw_response_wrapper( + prompts.list, + ) + self.delete = to_raw_response_wrapper( + prompts.delete, + ) + + +class AsyncPromptsResourceWithRawResponse: + def __init__(self, prompts: AsyncPromptsResource) -> None: + self._prompts = prompts + + self.create = async_to_raw_response_wrapper( + prompts.create, + ) + self.retrieve = async_to_raw_response_wrapper( + prompts.retrieve, + ) + self.update = async_to_raw_response_wrapper( + prompts.update, + ) + self.list = async_to_raw_response_wrapper( + prompts.list, + ) + self.delete = async_to_raw_response_wrapper( + prompts.delete, + ) + + +class PromptsResourceWithStreamingResponse: + def __init__(self, prompts: PromptsResource) -> None: + self._prompts = prompts + + self.create = to_streamed_response_wrapper( + prompts.create, + ) + self.retrieve = to_streamed_response_wrapper( + prompts.retrieve, + ) + self.update = to_streamed_response_wrapper( + prompts.update, + ) + self.list = to_streamed_response_wrapper( + prompts.list, + ) + self.delete = to_streamed_response_wrapper( + prompts.delete, + ) + + +class AsyncPromptsResourceWithStreamingResponse: + def __init__(self, prompts: AsyncPromptsResource) -> None: + self._prompts = prompts + + self.create = async_to_streamed_response_wrapper( + prompts.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + prompts.retrieve, + ) + self.update = async_to_streamed_response_wrapper( + prompts.update, + ) + self.list = async_to_streamed_response_wrapper( + prompts.list, + ) + self.delete = async_to_streamed_response_wrapper( + prompts.delete, + ) diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/api.md b/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/api.md index 99f688895d..f5bfd41e0c 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/api.md +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/api.md @@ -8,8 +8,10 @@ from nemo_platform.types.jobs import ( ComputeResources, ContainerSpec, CPUExecutionProvider, + CPUExecutionProviderParam, CreatePlatformJobRequest, DistributedGPUExecutionProvider, + DistributedGPUExecutionProviderParam, DockerJobExecutionProfile, DockerJobExecutionProfileConfig, DockerJobNetworkConfig, @@ -17,6 +19,7 @@ from nemo_platform.types.jobs import ( DockerVolumeMount, E2EJobExecutionProfile, GPUExecutionProvider, + GPUExecutionProviderParam, ImagePullSecret, JobExecutionProfileConfig, KubernetesEmptyDirVolume, @@ -33,7 +36,9 @@ from nemo_platform.types.jobs import ( PlatformJobSecretEnvironmentVariableRef, PlatformJobSortField, PlatformJobSpec, + PlatformJobSpecParam, PlatformJobStepSpec, + PlatformJobStepSpecParam, PlatformJobsListFilter, StepLifecycle, SubprocessExecutionProvider, diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/jobs.py b/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/jobs.py index 3a7aff2408..9c5652b216 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/jobs.py +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/jobs/jobs.py @@ -58,6 +58,7 @@ from ...pagination import SyncLogsPagination, AsyncLogsPagination, SyncDefaultPagination, AsyncDefaultPagination from ...types.jobs import ( PlatformJobSortField, + PlatformJobSpecParam, job_list_params, job_create_params, job_get_logs_params, diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py index 2d670dadaf..fafcd134f4 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py @@ -32,7 +32,6 @@ PlatformJobLog as PlatformJobLog, ToolCallConfig as ToolCallConfig, APIEndpointData as APIEndpointData, - FilesetMetadata as FilesetMetadata, FileStorageType as FileStorageType, InferenceParams as InferenceParams, LinearLayerSpec as LinearLayerSpec, diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py index 3833c1d785..b76dd4a694 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py @@ -22,6 +22,7 @@ from .cache_status import CacheStatus as CacheStatus from .fileset_file import FilesetFile as FilesetFile from .fileset_purpose import FilesetPurpose as FilesetPurpose +from .fileset_metadata import FilesetMetadata as FilesetMetadata from .s3_storage_config import S3StorageConfig as S3StorageConfig from .ngc_storage_config import NGCStorageConfig as NGCStorageConfig from .fileset_list_params import FilesetListParams as FilesetListParams @@ -32,6 +33,7 @@ from .fileset_create_params import FilesetCreateParams as FilesetCreateParams from .fileset_update_params import FilesetUpdateParams as FilesetUpdateParams from .file_list_files_params import FileListFilesParams as FileListFilesParams +from .fileset_metadata_param import FilesetMetadataParam as FilesetMetadataParam from .file_upload_file_params import FileUploadFileParams as FileUploadFileParams from .s3_storage_config_param import S3StorageConfigParam as S3StorageConfigParam from .ngc_storage_config_param import NGCStorageConfigParam as NGCStorageConfigParam diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py index 810d5ce990..e6d9642b7a 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py @@ -20,10 +20,10 @@ from ..._models import BaseModel from .fileset_purpose import FilesetPurpose +from .fileset_metadata import FilesetMetadata from .s3_storage_config import S3StorageConfig from .ngc_storage_config import NGCStorageConfig from .local_storage_config import LocalStorageConfig -from ..shared.fileset_metadata import FilesetMetadata from .huggingface_storage_config import HuggingfaceStorageConfig __all__ = ["Fileset", "Storage"] diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py index 06715b1c74..ccab3462e8 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py @@ -21,10 +21,10 @@ from typing_extensions import Required, TypeAlias, TypedDict from .fileset_purpose import FilesetPurpose +from .fileset_metadata_param import FilesetMetadataParam from .s3_storage_config_param import S3StorageConfigParam from .ngc_storage_config_param import NGCStorageConfigParam from .local_storage_config_param import LocalStorageConfigParam -from ..shared_params.fileset_metadata import FilesetMetadata from .huggingface_storage_config_param import HuggingfaceStorageConfigParam __all__ = ["FilesetCreateParams", "Storage"] @@ -49,7 +49,7 @@ class FilesetCreateParams(TypedDict, total=False): description: str """The description of the fileset.""" - metadata: FilesetMetadata + metadata: FilesetMetadataParam """Tagged metadata container - the key indicates the type. Example: metadata = FilesetMetadata( dataset=DatasetMetadataContent( diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared/fileset_metadata.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata.py similarity index 91% rename from sdk/python/nemo-platform/src/nemo_platform/types/shared/fileset_metadata.py rename to sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata.py index b35b6d8ecc..36573bd374 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/shared/fileset_metadata.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata.py @@ -18,8 +18,8 @@ from typing import Optional from ..._models import BaseModel -from .model_metadata_content import ModelMetadataContent -from .dataset_metadata_content import DatasetMetadataContent +from ..shared.model_metadata_content import ModelMetadataContent +from ..shared.dataset_metadata_content import DatasetMetadataContent __all__ = ["FilesetMetadata"] diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py similarity index 85% rename from sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata.py rename to sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py index d53a643b0d..66f37de921 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py @@ -19,13 +19,13 @@ from typing_extensions import TypedDict -from .model_metadata_content import ModelMetadataContent -from .dataset_metadata_content import DatasetMetadataContent +from ..shared_params.model_metadata_content import ModelMetadataContent +from ..shared_params.dataset_metadata_content import DatasetMetadataContent -__all__ = ["FilesetMetadata"] +__all__ = ["FilesetMetadataParam"] -class FilesetMetadata(TypedDict, total=False): +class FilesetMetadataParam(TypedDict, total=False): """Tagged metadata container - the key indicates the type. Example: diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_update_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_update_params.py index 3f8699dda8..0b389fd318 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_update_params.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_update_params.py @@ -21,7 +21,7 @@ from typing_extensions import TypedDict from .fileset_purpose import FilesetPurpose -from ..shared_params.fileset_metadata import FilesetMetadata +from .fileset_metadata_param import FilesetMetadataParam __all__ = ["FilesetUpdateParams"] @@ -35,7 +35,7 @@ class FilesetUpdateParams(TypedDict, total=False): description: str """The description of the fileset.""" - metadata: FilesetMetadata + metadata: FilesetMetadataParam """Tagged metadata container - the key indicates the type. Example: metadata = FilesetMetadata( dataset=DatasetMetadataContent( diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/__init__.py index a8b865241a..20356db654 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/inference/__init__.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/__init__.py @@ -18,14 +18,26 @@ from __future__ import annotations from .engine import Engine as Engine +from .prompt import Prompt as Prompt from .model_type import ModelType as ModelType +from .prompt_sort import PromptSort as PromptSort +from .prompts_page import PromptsPage as PromptsPage from .virtual_model import VirtualModel as VirtualModel from .model_provider import ModelProvider as ModelProvider +from .prompt_message import PromptMessage as PromptMessage from .middleware_call import MiddlewareCall as MiddlewareCall from .model_deployment import ModelDeployment as ModelDeployment +from .prompt_list_params import PromptListParams as PromptListParams +from .function_definition import FunctionDefinition as FunctionDefinition from .model_provider_sort import ModelProviderSort as ModelProviderSort +from .prompt_filter_param import PromptFilterParam as PromptFilterParam +from .prompt_message_role import PromptMessageRole as PromptMessageRole from .virtual_models_page import VirtualModelsPage as VirtualModelsPage +from .chat_completion_tool import ChatCompletionTool as ChatCompletionTool from .model_providers_page import ModelProvidersPage as ModelProvidersPage +from .prompt_create_params import PromptCreateParams as PromptCreateParams +from .prompt_message_param import PromptMessageParam as PromptMessageParam +from .prompt_update_params import PromptUpdateParams as PromptUpdateParams from .provider_list_params import ProviderListParams as ProviderListParams from .served_model_mapping import ServedModelMapping as ServedModelMapping from .middleware_call_param import MiddlewareCallParam as MiddlewareCallParam @@ -40,7 +52,9 @@ from .deployment_create_params import DeploymentCreateParams as DeploymentCreateParams from .deployment_update_params import DeploymentUpdateParams as DeploymentUpdateParams from .container_executor_config import ContainerExecutorConfig as ContainerExecutorConfig +from .function_definition_param import FunctionDefinitionParam as FunctionDefinitionParam from .virtual_model_list_params import VirtualModelListParams as VirtualModelListParams +from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam from .served_model_mapping_param import ServedModelMappingParam as ServedModelMappingParam from .virtual_model_patch_params import VirtualModelPatchParams as VirtualModelPatchParams from .model_provider_filter_param import ModelProviderFilterParam as ModelProviderFilterParam diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool.py new file mode 100644 index 0000000000..c0d66c32a2 --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool.py @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ..._models import BaseModel +from .function_definition import FunctionDefinition + +__all__ = ["ChatCompletionTool"] + + +class ChatCompletionTool(BaseModel): + """An OpenAI-compatible tool definition (currently always a function tool).""" + + function: FunctionDefinition + """An OpenAI-compatible function definition for tool calling. + + Mirrors the `function` object the Inference Gateway forwards to + OpenAI-compatible backends. + """ + + type: Literal["function"] + """The type of the tool. Currently only 'function' is supported.""" diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool_param.py new file mode 100644 index 0000000000..3275a2236e --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/chat_completion_tool_param.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +from .function_definition_param import FunctionDefinitionParam + +__all__ = ["ChatCompletionToolParam"] + + +class ChatCompletionToolParam(TypedDict, total=False): + """An OpenAI-compatible tool definition (currently always a function tool).""" + + function: Required[FunctionDefinitionParam] + """An OpenAI-compatible function definition for tool calling. + + Mirrors the `function` object the Inference Gateway forwards to + OpenAI-compatible backends. + """ + + type: Required[Literal["function"]] + """The type of the tool. Currently only 'function' is supported.""" diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition.py new file mode 100644 index 0000000000..1fdd2ceb03 --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition.py @@ -0,0 +1,45 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, Optional + +from ..._models import BaseModel + +__all__ = ["FunctionDefinition"] + + +class FunctionDefinition(BaseModel): + """An OpenAI-compatible function definition for tool calling. + + Mirrors the ``function`` object the Inference Gateway forwards to + OpenAI-compatible backends. + """ + + name: str + """The name of the function to be called.""" + + description: Optional[str] = None + """ + A description of what the function does, used by the model to decide when and + how to call it. + """ + + parameters: Optional[Dict[str, object]] = None + """The parameters the function accepts, described as a JSON Schema object.""" + + strict: Optional[bool] = None + """Whether to enforce strict schema adherence when generating the function call.""" diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition_param.py new file mode 100644 index 0000000000..d42fd2a983 --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/function_definition_param.py @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict +from typing_extensions import Required, TypedDict + +__all__ = ["FunctionDefinitionParam"] + + +class FunctionDefinitionParam(TypedDict, total=False): + """An OpenAI-compatible function definition for tool calling. + + Mirrors the ``function`` object the Inference Gateway forwards to + OpenAI-compatible backends. + """ + + name: Required[str] + """The name of the function to be called.""" + + description: str + """ + A description of what the function does, used by the model to decide when and + how to call it. + """ + + parameters: Dict[str, object] + """The parameters the function accepts, described as a JSON Schema object.""" + + strict: bool + """Whether to enforce strict schema adherence when generating the function call.""" diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt.py new file mode 100644 index 0000000000..c16b94d2ec --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt.py @@ -0,0 +1,96 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, List, Union, Optional +from datetime import datetime + +from ..._models import BaseModel +from .prompt_message import PromptMessage +from .chat_completion_tool import ChatCompletionTool +from ..shared.inference_params import InferenceParams + +__all__ = ["Prompt"] + + +class Prompt(BaseModel): + """A reusable, stored chat prompt. + + A Prompt captures the messages, declared template variables, optional tool + definitions, and default inference parameters needed to invoke a model + through the Inference Gateway. The unique identifier is workspace/name. + """ + + created_at: datetime + """The timestamp of model entity creation""" + + name: str + """Name of the entity. + + Name/workspace combo must be unique across all entities. Allowed characters: + letters (a-z, A-Z), digits (0-9), underscores, hyphens, and dots. + """ + + updated_at: datetime + """The timestamp of the last model entity update""" + + workspace: str + """The workspace of the entity. + + Allowed characters: letters (a-z, A-Z), digits (0-9), underscores, hyphens, and + dots. + """ + + id: Optional[str] = None + """Unique identifier for the prompt.""" + + description: Optional[str] = None + """Optional description of the prompt.""" + + inference_params: Optional[InferenceParams] = None + """Parameters for model inference. + + Extra fields can be supplied for additional options applied to the inference + request directly. Fields not supported by the model may cause inference errors + during evaluation. + """ + + input_variables: Optional[List[str]] = None + """Names of the Jinja2 template variables the prompt expects.""" + + messages: Optional[List[PromptMessage]] = None + """Ordered list of chat messages that make up the prompt.""" + + project: Optional[str] = None + """The URN of the project associated with this entity.""" + + response_format: Optional[Dict[str, object]] = None + """Optional OpenAI-compatible response_format, e.g. + + a json_schema structured-output spec. + """ + + tags: Optional[List[str]] = None + """Optional free-form tags for organizing prompts.""" + + tool_choice: Union[str, Dict[str, object], None] = None + """ + Controls which (if any) tool is called: 'none', 'auto', 'required', or a + named-tool object. + """ + + tools: Optional[List[ChatCompletionTool]] = None + """Optional OpenAI-compatible tool definitions to send with the prompt.""" diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_create_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_create_params.py new file mode 100644 index 0000000000..66960a3ca9 --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_create_params.py @@ -0,0 +1,64 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable +from typing_extensions import Required, TypedDict + +from ..._types import SequenceNotStr +from .prompt_message_param import PromptMessageParam +from .chat_completion_tool_param import ChatCompletionToolParam +from ..shared_params.inference_params import InferenceParams + +__all__ = ["PromptCreateParams"] + + +class PromptCreateParams(TypedDict, total=False): + workspace: str + + name: Required[str] + """Name of the prompt. + + Allowed characters: letters (a-z, A-Z), digits (0-9), underscores, hyphens, and + dots. + """ + + description: str + + inference_params: InferenceParams + """Parameters for model inference. + + Extra fields can be supplied for additional options applied to the inference + request directly. Fields not supported by the model may cause inference errors + during evaluation. + """ + + input_variables: SequenceNotStr[str] + + messages: Iterable[PromptMessageParam] + + project: str + """The URN of the project associated with this prompt.""" + + response_format: Dict[str, object] + + tags: SequenceNotStr[str] + + tool_choice: Union[str, Dict[str, object]] + + tools: Iterable[ChatCompletionToolParam] diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_filter_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_filter_param.py new file mode 100644 index 0000000000..55bcc70223 --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_filter_param.py @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import TypedDict + +from ..shared_params.datetime_filter import DatetimeFilter + +__all__ = ["PromptFilterParam"] + + +class PromptFilterParam(TypedDict, total=False): + """Filter for Prompt queries.""" + + created_at: DatetimeFilter + """Filter by creation date.""" + + description: str + """Filter by description.""" + + name: str + """Filter by name.""" + + project: str + """Filter by project URN.""" + + updated_at: DatetimeFilter + """Filter by update date.""" + + workspace: str + """Filter by workspace.""" diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_list_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_list_params.py new file mode 100644 index 0000000000..a9e8d1629d --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_list_params.py @@ -0,0 +1,47 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import TypedDict + +from .prompt_sort import PromptSort +from .prompt_filter_param import PromptFilterParam + +__all__ = ["PromptListParams"] + + +class PromptListParams(TypedDict, total=False): + workspace: str + + filter: PromptFilterParam + """ + Filter prompts by workspace, project, name, description, created_at, and + updated_at. + """ + + page: int + """Page number.""" + + page_size: int + """Page size.""" + + sort: PromptSort + """The field to sort by. + + To sort in decreasing order, use `-` in front of the field name. + """ diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message.py new file mode 100644 index 0000000000..255845e5c1 --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message.py @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from ..._models import BaseModel +from .prompt_message_role import PromptMessageRole + +__all__ = ["PromptMessage"] + + +class PromptMessage(BaseModel): + """A single templated message in a chat prompt. + + ``content`` is a Jinja2 template body that may reference the prompt's + declared ``input_variables`` (e.g. ``{{ topic }}``). + """ + + content: str + """Templated message content. May contain template variables.""" + + role: PromptMessageRole + """Role of a message author in a chat prompt. + + Follows the OpenAI chat schema the Inference Gateway speaks + (`/v1/chat/completions`). + """ diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_param.py new file mode 100644 index 0000000000..a861ed6e10 --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_param.py @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Required, TypedDict + +from .prompt_message_role import PromptMessageRole + +__all__ = ["PromptMessageParam"] + + +class PromptMessageParam(TypedDict, total=False): + """A single templated message in a chat prompt. + + ``content`` is a Jinja2 template body that may reference the prompt's + declared ``input_variables`` (e.g. ``{{ topic }}``). + """ + + content: Required[str] + """Templated message content. May contain template variables.""" + + role: Required[PromptMessageRole] + """Role of a message author in a chat prompt. + + Follows the OpenAI chat schema the Inference Gateway speaks + (`/v1/chat/completions`). + """ diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_role.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_role.py new file mode 100644 index 0000000000..9ce7d3e39b --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_message_role.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal, TypeAlias + +__all__ = ["PromptMessageRole"] + +PromptMessageRole: TypeAlias = Literal["system", "developer", "user", "assistant"] diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_sort.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_sort.py new file mode 100644 index 0000000000..f158f56b59 --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_sort.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal, TypeAlias + +__all__ = ["PromptSort"] + +PromptSort: TypeAlias = Literal["name", "-name", "created_at", "-created_at", "updated_at", "-updated_at"] diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_update_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_update_params.py new file mode 100644 index 0000000000..c93613b7cc --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompt_update_params.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable +from typing_extensions import TypedDict + +from ..._types import SequenceNotStr +from .prompt_message_param import PromptMessageParam +from .chat_completion_tool_param import ChatCompletionToolParam +from ..shared_params.inference_params import InferenceParams + +__all__ = ["PromptUpdateParams"] + + +class PromptUpdateParams(TypedDict, total=False): + workspace: str + + description: str + + inference_params: InferenceParams + """Parameters for model inference. + + Extra fields can be supplied for additional options applied to the inference + request directly. Fields not supported by the model may cause inference errors + during evaluation. + """ + + input_variables: SequenceNotStr[str] + + messages: Iterable[PromptMessageParam] + + project: str + """The URN of the project associated with this prompt.""" + + response_format: Dict[str, object] + + tags: SequenceNotStr[str] + + tool_choice: Union[str, Dict[str, object]] + + tools: Iterable[ChatCompletionToolParam] diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompts_page.py b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompts_page.py new file mode 100644 index 0000000000..e317a98336 --- /dev/null +++ b/sdk/python/nemo-platform/src/nemo_platform/types/inference/prompts_page.py @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, List, Optional + +from .prompt import Prompt +from ..._models import BaseModel +from ..shared.pagination_data import PaginationData + +__all__ = ["PromptsPage"] + + +class PromptsPage(BaseModel): + data: List[Prompt] + + filter: Optional[Dict[str, object]] = None + """Filtering information.""" + + pagination: Optional[PaginationData] = None + """Pagination information.""" + + sort: Optional[str] = None + """The field on which the results are sorted.""" diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py index e0178b4f49..7a667ef8b5 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py @@ -26,7 +26,6 @@ from .delete_response import DeleteResponse as DeleteResponse from .finetuning_type import FinetuningType as FinetuningType from .pagination_data import PaginationData as PaginationData -from .fileset_metadata import FilesetMetadata as FilesetMetadata from .inference_params import InferenceParams as InferenceParams from .platform_job_log import PlatformJobLog as PlatformJobLog from .tool_call_config import ToolCallConfig as ToolCallConfig diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/__init__.py index 449d6c5e14..f78dae8e90 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/__init__.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/__init__.py @@ -23,7 +23,6 @@ from .backend_format import BackendFormat as BackendFormat from .datetime_filter import DatetimeFilter as DatetimeFilter from .finetuning_type import FinetuningType as FinetuningType -from .fileset_metadata import FilesetMetadata as FilesetMetadata from .inference_params import InferenceParams as InferenceParams from .tool_call_config import ToolCallConfig as ToolCallConfig from .api_endpoint_data import APIEndpointData as APIEndpointData diff --git a/sdk/python/nemo-platform/tests/api_resources/inference/test_prompts.py b/sdk/python/nemo-platform/tests/api_resources/inference/test_prompts.py new file mode 100644 index 0000000000..ed9863015f --- /dev/null +++ b/sdk/python/nemo-platform/tests/api_resources/inference/test_prompts.py @@ -0,0 +1,741 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from tests.utils import assert_matches_type +from nemo_platform import NeMoPlatform, AsyncNeMoPlatform +from nemo_platform._utils import parse_datetime +from nemo_platform.pagination import SyncDefaultPagination, AsyncDefaultPagination +from nemo_platform.types.inference import ( + Prompt, +) + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestPrompts: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_method_create(self, client: NeMoPlatform) -> None: + prompt = client.inference.prompts.create( + workspace="workspace", + name="support-bot-system", + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_method_create_with_all_params(self, client: NeMoPlatform) -> None: + prompt = client.inference.prompts.create( + workspace="workspace", + name="support-bot-system", + description="description", + inference_params={ + "max_completion_tokens": 1, + "max_tokens": 1, + "model": "model", + "stop": ["string"], + "temperature": 0, + "top_p": 0, + }, + input_variables=["string"], + messages=[ + { + "content": "content", + "role": "system", + } + ], + project="project", + response_format={"foo": "bar"}, + tags=["string"], + tool_choice="string", + tools=[ + { + "function": { + "name": "name", + "description": "description", + "parameters": {"foo": "bar"}, + "strict": True, + }, + "type": "function", + } + ], + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_raw_response_create(self, client: NeMoPlatform) -> None: + response = client.inference.prompts.with_raw_response.create( + workspace="workspace", + name="support-bot-system", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_streaming_response_create(self, client: NeMoPlatform) -> None: + with client.inference.prompts.with_streaming_response.create( + workspace="workspace", + name="support-bot-system", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_path_params_create(self, client: NeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + client.inference.prompts.with_raw_response.create( + workspace="", + name="support-bot-system", + ) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_method_retrieve(self, client: NeMoPlatform) -> None: + prompt = client.inference.prompts.retrieve( + name="name", + workspace="workspace", + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_raw_response_retrieve(self, client: NeMoPlatform) -> None: + response = client.inference.prompts.with_raw_response.retrieve( + name="name", + workspace="workspace", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_streaming_response_retrieve(self, client: NeMoPlatform) -> None: + with client.inference.prompts.with_streaming_response.retrieve( + name="name", + workspace="workspace", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_path_params_retrieve(self, client: NeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + client.inference.prompts.with_raw_response.retrieve( + name="name", + workspace="", + ) + + with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"): + client.inference.prompts.with_raw_response.retrieve( + name="", + workspace="workspace", + ) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_method_update(self, client: NeMoPlatform) -> None: + prompt = client.inference.prompts.update( + name="name", + workspace="workspace", + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_method_update_with_all_params(self, client: NeMoPlatform) -> None: + prompt = client.inference.prompts.update( + name="name", + workspace="workspace", + description="description", + inference_params={ + "max_completion_tokens": 1, + "max_tokens": 1, + "model": "model", + "stop": ["string"], + "temperature": 0, + "top_p": 0, + }, + input_variables=["string"], + messages=[ + { + "content": "content", + "role": "system", + } + ], + project="project", + response_format={"foo": "bar"}, + tags=["string"], + tool_choice="string", + tools=[ + { + "function": { + "name": "name", + "description": "description", + "parameters": {"foo": "bar"}, + "strict": True, + }, + "type": "function", + } + ], + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_raw_response_update(self, client: NeMoPlatform) -> None: + response = client.inference.prompts.with_raw_response.update( + name="name", + workspace="workspace", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_streaming_response_update(self, client: NeMoPlatform) -> None: + with client.inference.prompts.with_streaming_response.update( + name="name", + workspace="workspace", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_path_params_update(self, client: NeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + client.inference.prompts.with_raw_response.update( + name="name", + workspace="", + ) + + with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"): + client.inference.prompts.with_raw_response.update( + name="", + workspace="workspace", + ) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_method_list(self, client: NeMoPlatform) -> None: + prompt = client.inference.prompts.list( + workspace="workspace", + ) + assert_matches_type(SyncDefaultPagination[Prompt], prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_method_list_with_all_params(self, client: NeMoPlatform) -> None: + prompt = client.inference.prompts.list( + workspace="workspace", + filter={ + "created_at": { + "gte": parse_datetime("2019-12-27T18:11:19.117Z"), + "lte": parse_datetime("2019-12-27T18:11:19.117Z"), + }, + "description": "description", + "name": "name", + "project": "project", + "updated_at": { + "gte": parse_datetime("2019-12-27T18:11:19.117Z"), + "lte": parse_datetime("2019-12-27T18:11:19.117Z"), + }, + "workspace": "workspace", + }, + page=1, + page_size=1, + sort="name", + ) + assert_matches_type(SyncDefaultPagination[Prompt], prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_raw_response_list(self, client: NeMoPlatform) -> None: + response = client.inference.prompts.with_raw_response.list( + workspace="workspace", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = response.parse() + assert_matches_type(SyncDefaultPagination[Prompt], prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_streaming_response_list(self, client: NeMoPlatform) -> None: + with client.inference.prompts.with_streaming_response.list( + workspace="workspace", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = response.parse() + assert_matches_type(SyncDefaultPagination[Prompt], prompt, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_path_params_list(self, client: NeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + client.inference.prompts.with_raw_response.list( + workspace="", + ) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_method_delete(self, client: NeMoPlatform) -> None: + prompt = client.inference.prompts.delete( + name="name", + workspace="workspace", + ) + assert prompt is None + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_raw_response_delete(self, client: NeMoPlatform) -> None: + response = client.inference.prompts.with_raw_response.delete( + name="name", + workspace="workspace", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = response.parse() + assert prompt is None + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_streaming_response_delete(self, client: NeMoPlatform) -> None: + with client.inference.prompts.with_streaming_response.delete( + name="name", + workspace="workspace", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = response.parse() + assert prompt is None + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + def test_path_params_delete(self, client: NeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + client.inference.prompts.with_raw_response.delete( + name="name", + workspace="", + ) + + with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"): + client.inference.prompts.with_raw_response.delete( + name="", + workspace="workspace", + ) + + +class TestAsyncPrompts: + parametrize = pytest.mark.parametrize( + "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"] + ) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_method_create(self, async_client: AsyncNeMoPlatform) -> None: + prompt = await async_client.inference.prompts.create( + workspace="workspace", + name="support-bot-system", + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_method_create_with_all_params(self, async_client: AsyncNeMoPlatform) -> None: + prompt = await async_client.inference.prompts.create( + workspace="workspace", + name="support-bot-system", + description="description", + inference_params={ + "max_completion_tokens": 1, + "max_tokens": 1, + "model": "model", + "stop": ["string"], + "temperature": 0, + "top_p": 0, + }, + input_variables=["string"], + messages=[ + { + "content": "content", + "role": "system", + } + ], + project="project", + response_format={"foo": "bar"}, + tags=["string"], + tool_choice="string", + tools=[ + { + "function": { + "name": "name", + "description": "description", + "parameters": {"foo": "bar"}, + "strict": True, + }, + "type": "function", + } + ], + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_raw_response_create(self, async_client: AsyncNeMoPlatform) -> None: + response = await async_client.inference.prompts.with_raw_response.create( + workspace="workspace", + name="support-bot-system", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = await response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_streaming_response_create(self, async_client: AsyncNeMoPlatform) -> None: + async with async_client.inference.prompts.with_streaming_response.create( + workspace="workspace", + name="support-bot-system", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = await response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_path_params_create(self, async_client: AsyncNeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + await async_client.inference.prompts.with_raw_response.create( + workspace="", + name="support-bot-system", + ) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_method_retrieve(self, async_client: AsyncNeMoPlatform) -> None: + prompt = await async_client.inference.prompts.retrieve( + name="name", + workspace="workspace", + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_raw_response_retrieve(self, async_client: AsyncNeMoPlatform) -> None: + response = await async_client.inference.prompts.with_raw_response.retrieve( + name="name", + workspace="workspace", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = await response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_streaming_response_retrieve(self, async_client: AsyncNeMoPlatform) -> None: + async with async_client.inference.prompts.with_streaming_response.retrieve( + name="name", + workspace="workspace", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = await response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_path_params_retrieve(self, async_client: AsyncNeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + await async_client.inference.prompts.with_raw_response.retrieve( + name="name", + workspace="", + ) + + with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"): + await async_client.inference.prompts.with_raw_response.retrieve( + name="", + workspace="workspace", + ) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_method_update(self, async_client: AsyncNeMoPlatform) -> None: + prompt = await async_client.inference.prompts.update( + name="name", + workspace="workspace", + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_method_update_with_all_params(self, async_client: AsyncNeMoPlatform) -> None: + prompt = await async_client.inference.prompts.update( + name="name", + workspace="workspace", + description="description", + inference_params={ + "max_completion_tokens": 1, + "max_tokens": 1, + "model": "model", + "stop": ["string"], + "temperature": 0, + "top_p": 0, + }, + input_variables=["string"], + messages=[ + { + "content": "content", + "role": "system", + } + ], + project="project", + response_format={"foo": "bar"}, + tags=["string"], + tool_choice="string", + tools=[ + { + "function": { + "name": "name", + "description": "description", + "parameters": {"foo": "bar"}, + "strict": True, + }, + "type": "function", + } + ], + ) + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_raw_response_update(self, async_client: AsyncNeMoPlatform) -> None: + response = await async_client.inference.prompts.with_raw_response.update( + name="name", + workspace="workspace", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = await response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_streaming_response_update(self, async_client: AsyncNeMoPlatform) -> None: + async with async_client.inference.prompts.with_streaming_response.update( + name="name", + workspace="workspace", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = await response.parse() + assert_matches_type(Prompt, prompt, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_path_params_update(self, async_client: AsyncNeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + await async_client.inference.prompts.with_raw_response.update( + name="name", + workspace="", + ) + + with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"): + await async_client.inference.prompts.with_raw_response.update( + name="", + workspace="workspace", + ) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_method_list(self, async_client: AsyncNeMoPlatform) -> None: + prompt = await async_client.inference.prompts.list( + workspace="workspace", + ) + assert_matches_type(AsyncDefaultPagination[Prompt], prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_method_list_with_all_params(self, async_client: AsyncNeMoPlatform) -> None: + prompt = await async_client.inference.prompts.list( + workspace="workspace", + filter={ + "created_at": { + "gte": parse_datetime("2019-12-27T18:11:19.117Z"), + "lte": parse_datetime("2019-12-27T18:11:19.117Z"), + }, + "description": "description", + "name": "name", + "project": "project", + "updated_at": { + "gte": parse_datetime("2019-12-27T18:11:19.117Z"), + "lte": parse_datetime("2019-12-27T18:11:19.117Z"), + }, + "workspace": "workspace", + }, + page=1, + page_size=1, + sort="name", + ) + assert_matches_type(AsyncDefaultPagination[Prompt], prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_raw_response_list(self, async_client: AsyncNeMoPlatform) -> None: + response = await async_client.inference.prompts.with_raw_response.list( + workspace="workspace", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = await response.parse() + assert_matches_type(AsyncDefaultPagination[Prompt], prompt, path=["response"]) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_streaming_response_list(self, async_client: AsyncNeMoPlatform) -> None: + async with async_client.inference.prompts.with_streaming_response.list( + workspace="workspace", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = await response.parse() + assert_matches_type(AsyncDefaultPagination[Prompt], prompt, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_path_params_list(self, async_client: AsyncNeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + await async_client.inference.prompts.with_raw_response.list( + workspace="", + ) + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_method_delete(self, async_client: AsyncNeMoPlatform) -> None: + prompt = await async_client.inference.prompts.delete( + name="name", + workspace="workspace", + ) + assert prompt is None + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_raw_response_delete(self, async_client: AsyncNeMoPlatform) -> None: + response = await async_client.inference.prompts.with_raw_response.delete( + name="name", + workspace="workspace", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + prompt = await response.parse() + assert prompt is None + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_streaming_response_delete(self, async_client: AsyncNeMoPlatform) -> None: + async with async_client.inference.prompts.with_streaming_response.delete( + name="name", + workspace="workspace", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + prompt = await response.parse() + assert prompt is None + + assert cast(Any, response.is_closed) is True + + @pytest.mark.skip(reason="Mock server tests are disabled") + @parametrize + async def test_path_params_delete(self, async_client: AsyncNeMoPlatform) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `workspace` but received ''"): + await async_client.inference.prompts.with_raw_response.delete( + name="name", + workspace="", + ) + + with pytest.raises(ValueError, match=r"Expected a non-empty value for `name` but received ''"): + await async_client.inference.prompts.with_raw_response.delete( + name="", + workspace="workspace", + ) diff --git a/sdk/stainless.yaml b/sdk/stainless.yaml index 9da6993d54..e96b34b9e4 100644 --- a/sdk/stainless.yaml +++ b/sdk/stainless.yaml @@ -26,16 +26,16 @@ custom_casings: initialism: true targets: -# typescript: -# _skip_running_tests: false -# package_name: nemo-platform-v1 -# publish: -# npm: false -# skip: false -# options: -# mcp_server: -# package_name: nemo-platform-v1-mcp -# enable_all_resources: true + # typescript: + # _skip_running_tests: false + # package_name: nemo-platform-v1 + # publish: + # npm: false + # skip: false + # options: + # mcp_server: + # package_name: nemo-platform-v1-mcp + # enable_all_resources: true python: # [docs]: https://www.stainless.com/docs/reference/editions @@ -81,86 +81,86 @@ client_settings: # `pagination` defines [pagination schemes] which provides a template to match # endpoints and generate next-page and auto-pagination helpers in the SDKs. pagination: -- name: default_pagination - type: page_number - request: - page: - type: integer - x-stainless-pagination-property: - purpose: page_number_param - page_size: - type: integer - response: - data: - type: array - x-stainless-pagination-property: - purpose: items - items: + - name: default_pagination + type: page_number + request: + page: + type: integer + x-stainless-pagination-property: + purpose: page_number_param + page_size: + type: integer + response: + data: + type: array + x-stainless-pagination-property: + purpose: items + items: + type: object + additionalProperties: true + pagination: type: object - additionalProperties: true - pagination: - type: object - properties: - page: - type: integer - title: Page - description: The current page number. - x-stainless-pagination-property: - purpose: current_page_number_field - page_size: - type: integer - title: Page Size - description: The page size used for the query. - current_page_size: - type: integer - title: Current Page Size - description: The size for the current page. - total_pages: - type: integer - title: Total Pages - description: The total number of pages. - x-stainless-pagination-property: - purpose: total_page_count_field - total_results: - type: integer - title: Total Results - description: The total number of results. - required: - - page - - page_size - - total_pages - - total_results - - current_page_size -- name: logs_pagination - type: cursor - request: - limit: - type: integer - page_cursor: - type: string - x-stainless-pagination-property: - purpose: next_cursor_param - response: - data: - type: array - x-stainless-pagination-property: - purpose: items - items: - type: object - additionalProperties: true - next_page: - type: string - x-stainless-pagination-property: - purpose: next_cursor_field + properties: + page: + type: integer + title: Page + description: The current page number. + x-stainless-pagination-property: + purpose: current_page_number_field + page_size: + type: integer + title: Page Size + description: The page size used for the query. + current_page_size: + type: integer + title: Current Page Size + description: The size for the current page. + total_pages: + type: integer + title: Total Pages + description: The total number of pages. + x-stainless-pagination-property: + purpose: total_page_count_field + total_results: + type: integer + title: Total Results + description: The total number of results. + required: + - page + - page_size + - total_pages + - total_results + - current_page_size + - name: logs_pagination + type: cursor + request: + limit: + type: integer + page_cursor: + type: string + x-stainless-pagination-property: + purpose: next_cursor_param + response: + data: + type: array + x-stainless-pagination-property: + purpose: items + items: + type: object + additionalProperties: true + next_page: + type: string + x-stainless-pagination-property: + purpose: next_cursor_field streaming: on_event: - - data_starts_with: "[DONE]" - handle: done - - event_type: error - handle: error - - event_type: - handle: yield + - data_starts_with: "[DONE]" + handle: done + - event_type: error + handle: error + - event_type: + handle: yield readme: example_requests: @@ -738,7 +738,6 @@ resources: tool_calling_metadata_content: ToolCallingMetadataContent backend_format: BackendFormat finetuning_type: FinetuningType - inference_params: InferenceParams iam: standalone_api: true subresources: From 167f5c69652951ed3646be548d7b3e6c9ba43085 Mon Sep 17 00:00:00 2001 From: Sean Teramae Date: Thu, 11 Jun 2026 17:25:16 -0700 Subject: [PATCH 08/10] final lint Signed-off-by: Sean Teramae --- docs/cli/reference.mdx | 213 ++++++++++ .../cli/commands/api/inference/__init__.py | 2 + .../cli/commands/api/inference/prompts.py | 369 ++++++++++++++++++ .../cli/commands/api/inference/__init__.py | 2 + .../cli/commands/api/inference/prompts.py | 369 ++++++++++++++++++ 5 files changed, 955 insertions(+) create mode 100644 packages/nemo_platform_ext/src/nemo_platform_ext/cli/commands/api/inference/prompts.py create mode 100644 sdk/python/nemo-platform/src/nemo_platform/cli/commands/api/inference/prompts.py diff --git a/docs/cli/reference.mdx b/docs/cli/reference.mdx index ed519d408b..42011f745e 100644 --- a/docs/cli/reference.mdx +++ b/docs/cli/reference.mdx @@ -1391,6 +1391,7 @@ nemo inference [OPTIONS] COMMAND [ARGS]... * `deployments`: Manage deployments * `gateway`: Gateway operations * `models`: Manage models +* `prompts`: Manage prompts * `providers`: Manage providers * `virtual-models`: Manage virtual_models @@ -2856,6 +2857,218 @@ nemo inference models list [OPTIONS] * `--no-truncate`: Don't truncate long values in table/markdown/csv output. * `--output-columns, -c`: Columns to display: 'default', 'all', or comma-separated names. Only affects table/csv/markdown formats. +#### nemo inference prompts + +Manage prompts + +**Usage:** + +```shell +nemo inference prompts [OPTIONS] COMMAND [ARGS]... +``` + +**Help:** + +* `--help, -h`: Show this message and exit. + +**Commands:** + +* `create`: Create a new prompt. +* `delete`: Delete a prompt by workspace and name. +* `list`: List prompts for a specific workspace. +* `get`: Get a prompt by workspace and name. +* `update`: Update an existing prompt (full replacement of mutable... + +##### nemo inference prompts create + +Create a new prompt. + +**Required fields:** name + +**Examples:** + +```shell +nemo inference prompts create --input-file config.json +nemo inference prompts create --input-data '{"name": "value"}' +echo '{"json": "data"}' | nemo inference prompts create --input-file - +nemo inference prompts create --