From 1e69f08dc249bcffc9a31963b1bfd7be3d1177f6 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Tue, 8 Jul 2025 08:59:31 -0700 Subject: [PATCH 01/15] Add JS isModelUpgradable and upgradeModel --- sdk/js/src/base.ts | 53 ++++++++++++++++++++++++++++++++++++++++++++- sdk/js/src/types.ts | 30 +++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/sdk/js/src/base.ts b/sdk/js/src/base.ts index 60a6ecc0..c7f38c89 100644 --- a/sdk/js/src/base.ts +++ b/sdk/js/src/base.ts @@ -3,7 +3,7 @@ import * as client from './client.js' import { ExecutionProvider } from './types.js' -import type { DownloadBody, Fetch, FoundryModelInfo, FoundryListResponseModel } from './types.js' +import type { DownloadBody, UpgradeBody, Fetch, FoundryModelInfo, FoundryListResponseModel } from './types.js' /** * Utility function to detect if the platform is Windows. @@ -271,6 +271,57 @@ export class FoundryLocalManager { return modelInfo } + + /** + * Checks if a newer version of a model is available. + * @param {string} aliasOrModelId - The alias or model ID. + * @returns {Promise} True if a newer version is available, otherwise false. + */ + async isModelUpgradable(aliasOrModelId: string): Promise { + const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradable/${aliasOrModelId}`) + const data = await response.json() + return data.upgradable + } + + /** + * Downloads the latest version of a model to the local cache. + * @param {string} aliasOrModelId - The alias or model ID. + * @param {string} [token] - Optional token for authentication. + * @param {(progress: number) => void} [onProgress] - Callback for download progress percentage. + * @returns {Promise} The upgraded model information. + */ + async upgradeModel( + aliasOrModelId: string, + token?: string, + onProgress?: (progress: number) => void, + ): Promise { + const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo + + const upgradeBody: UpgradeBody = { + Name: modelInfo.id, + Uri: modelInfo.uri, + Publisher: modelInfo.publisher, + ProviderType: modelInfo.provider === 'AzureFoundry' ? `${modelInfo.provider}Local` : modelInfo.provider, + PromptTemplate: modelInfo.promptTemplate, + } + + const body = { + model: upgradeBody, + ...(token && { token }), + IgnorePipeReport: true, + } + + const data = await client.postWithProgress(this.fetch, `${this.serviceUrl}/openai/upgrade`, body, onProgress) + + if (!data.success) { + throw new Error( + `Failed to upgrade model with alias '${modelInfo.alias}' and ID '${modelInfo.id}': ${data.error}`, + ) + } + + return modelInfo + } + /** * Loads a model. * @param {string} aliasOrModelId - The alias or model ID. diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts index ade00e1c..1467bac5 100644 --- a/sdk/js/src/types.ts +++ b/sdk/js/src/types.ts @@ -219,3 +219,33 @@ export interface DownloadBody { */ PromptTemplate: Record } + +/** + * Interface representing the body of an upgrade request. + */ +export interface UpgradeBody { + /** + * The name of the model. + */ + Name: string + + /** + * The URI of the model. + */ + Uri: string + + /** + * The publisher of the model. + */ + Publisher: string + + /** + * The provider type of the model. + */ + ProviderType: string + + /** + * The prompt template associated with the model. + */ + PromptTemplate: Record +} From 0c73bc87cbf648076aba1294a0a9880bdb5e9b11 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Tue, 8 Jul 2025 09:47:41 -0700 Subject: [PATCH 02/15] small updates to JS upgrade API --- sdk/js/src/base.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sdk/js/src/base.ts b/sdk/js/src/base.ts index c7f38c89..7c6d6adc 100644 --- a/sdk/js/src/base.ts +++ b/sdk/js/src/base.ts @@ -278,7 +278,9 @@ export class FoundryLocalManager { * @returns {Promise} True if a newer version is available, otherwise false. */ async isModelUpgradable(aliasOrModelId: string): Promise { - const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradable/${aliasOrModelId}`) + const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo + + const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradable/${modelInfo.id}`) const data = await response.json() return data.upgradable } @@ -301,7 +303,7 @@ export class FoundryLocalManager { Name: modelInfo.id, Uri: modelInfo.uri, Publisher: modelInfo.publisher, - ProviderType: modelInfo.provider === 'AzureFoundry' ? `${modelInfo.provider}Local` : modelInfo.provider, + ProviderType: modelInfo.provider === 'AzureFoundry' ? `AzureFoundryLocal` : modelInfo.provider, PromptTemplate: modelInfo.promptTemplate, } From a3ba62d64b8dc6e6415ae115a41cad2f3abc8793 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Tue, 8 Jul 2025 09:48:07 -0700 Subject: [PATCH 03/15] Add Python upgrade_model and is_model_upgradable APIs --- sdk/python/foundry_local/api.py | 56 +++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py index 92909e88..7d73765d 100644 --- a/sdk/python/foundry_local/api.py +++ b/sdk/python/foundry_local/api.py @@ -271,6 +271,62 @@ def download_model(self, alias_or_model_id: str, token: str | None = None, force ) return model_info + def is_model_upgradable(self, alias_or_model_id: str) -> bool: + """ + Check if a newer version of a model is available. + + Args: + alias_or_model_id (str): Alias or Model ID. + + Returns: + bool: True if a newer version is available, False otherwise. + + Raises: + ValueError: If the model is not found in the catalog. + """ + model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True) + logger.info("Checking if model '%s' (ID: '%s') is upgradable...", model_info.alias, model_info.id) + + response = self.httpx_client.get(f"/foundry/upgradable/{model_info.id}") + return response.get("upgradable", False) + + def upgrade_model(self, alias_or_model_id: str, token: str | None = None) -> None: + """ + Download the latest version of a model to the local cache. + + Args: + alias_or_model_id (str): Alias or Model ID. + token (str | None): Optional token for authentication. + + Raises: + ValueError: If the model is not found in the catalog. + RuntimeError: If the model upgrade fails. + """ + model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True) + logger.info("Upgrading model with alias '%s' and ID '%s'...", model_info.alias, model_info.id) + + upgrade_body = { + "Name": model_info.id, + "Uri": model_info.uri, + "Publisher": model_info.publisher, + "ProviderType": "AzureFoundryLocal" if model_info.provider == "AzureFoundry" else model_info.provider, + "PromptTemplate": model_info.prompt_template, + } + body={ + "model": upgrade_body, + "token": token, + "IgnorePipeReport": True, + }, + + response_body = self.httpx_client.post_with_progress("/foundry/upgrade", body=body) + + if not response_body.get("success", False): + raise RuntimeError( + f"Failed to upgrade model with error: {response_body.get('errorMessage', 'Unknown error')}" + ) + + return model_info + def load_model(self, alias_or_model_id: str, ttl: int = 600) -> FoundryModelInfo: """ Load a model. From c6c73730cdd090503c093a670b5152ca136a30f9 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Tue, 8 Jul 2025 17:28:51 -0700 Subject: [PATCH 04/15] consistent apis --- sdk/js/src/base.ts | 6 +++--- sdk/python/foundry_local/api.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sdk/js/src/base.ts b/sdk/js/src/base.ts index 7c6d6adc..4b2c0141 100644 --- a/sdk/js/src/base.ts +++ b/sdk/js/src/base.ts @@ -277,12 +277,12 @@ export class FoundryLocalManager { * @param {string} aliasOrModelId - The alias or model ID. * @returns {Promise} True if a newer version is available, otherwise false. */ - async isModelUpgradable(aliasOrModelId: string): Promise { + async isModelUpgradeable(aliasOrModelId: string): Promise { const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo - const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradable/${modelInfo.id}`) + const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradeable/${modelInfo.id}`) const data = await response.json() - return data.upgradable + return data.upgradeable } /** diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py index 7d73765d..3a1bec0f 100644 --- a/sdk/python/foundry_local/api.py +++ b/sdk/python/foundry_local/api.py @@ -271,7 +271,7 @@ def download_model(self, alias_or_model_id: str, token: str | None = None, force ) return model_info - def is_model_upgradable(self, alias_or_model_id: str) -> bool: + def is_model_upgradeable(self, alias_or_model_id: str) -> bool: """ Check if a newer version of a model is available. @@ -285,10 +285,10 @@ def is_model_upgradable(self, alias_or_model_id: str) -> bool: ValueError: If the model is not found in the catalog. """ model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True) - logger.info("Checking if model '%s' (ID: '%s') is upgradable...", model_info.alias, model_info.id) + logger.info("Checking if model '%s' (ID: '%s') is upgradeable...", model_info.alias, model_info.id) - response = self.httpx_client.get(f"/foundry/upgradable/{model_info.id}") - return response.get("upgradable", False) + response = self.httpx_client.get(f"/openai/upgradeable/{model_info.id}") + return response.get("upgradeable", False) def upgrade_model(self, alias_or_model_id: str, token: str | None = None) -> None: """ @@ -318,7 +318,7 @@ def upgrade_model(self, alias_or_model_id: str, token: str | None = None) -> Non "IgnorePipeReport": True, }, - response_body = self.httpx_client.post_with_progress("/foundry/upgrade", body=body) + response_body = self.httpx_client.post_with_progress("/openai/upgrade", body=body) if not response_body.get("success", False): raise RuntimeError( From 2a296b3b89a4dfc6674230cb1a41464a918a6750 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Tue, 8 Jul 2025 22:19:09 -0700 Subject: [PATCH 05/15] Add C# APIs --- sdk/cs/src/FoundryLocalManager.cs | 62 +++++++++++++++++++++++++++++++ sdk/cs/src/FoundryModelInfo.cs | 29 +++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/sdk/cs/src/FoundryLocalManager.cs b/sdk/cs/src/FoundryLocalManager.cs index f5e3a132..b71cf9a1 100644 --- a/sdk/cs/src/FoundryLocalManager.cs +++ b/sdk/cs/src/FoundryLocalManager.cs @@ -245,6 +245,68 @@ public async Task> ListCachedModelsAsync(CancellationToken ct = return modelInfo; } + public async Task IsModelUpgradeableAsync(string aliasOrModelId, CancellationToken ct = default) + { + var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct) + ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog."); + + await StartServiceAsync(ct); + var response = await _serviceClient!.GetAsync($"/openai/upgradeable/{modelInfo.ModelId}", ct); + response.EnsureSuccessStatusCode(); + + var jsonResponse = await response.Content.ReadAsStringAsync(ct); + using var jsonDoc = JsonDocument.Parse(jsonResponse); + return jsonDoc.RootElement.GetProperty("upgradeable").GetBoolean(); + } + + public async Task UpgradeModelAsync( + string aliasOrModelId, + string? token = null, + CancellationToken ct = default) + { + var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct) + ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog."); + + var request = new UpgradeRequest + { + Model = new UpgradeRequest.UpgradeBody + { + Name = modelInfo.ModelId, + Uri = modelInfo.Uri, + Publisher = modelInfo.Publisher, + ProviderType = modelInfo.ProviderType == "AzureFoundry" ? "AzureFoundryLocal" : modelInfo.ProviderType, + PromptTemplate = modelInfo.PromptTemplate + }, + Token = token ?? "", + IgnorePipeReport = true + }; + + var response = await _serviceClient!.PostAsJsonAsync("/openai/upgrade", request, ct); + response.EnsureSuccessStatusCode(); + var responseBody = await response.Content.ReadAsStringAsync(ct); + + // Find the last '{' to get the start of the JSON object + var jsonStart = responseBody.LastIndexOf('{'); + if (jsonStart == -1) + { + throw new InvalidOperationException("No JSON object found in response."); + } + + var jsonPart = responseBody[jsonStart..]; + + // Parse the JSON part + using var jsonDoc = JsonDocument.Parse(jsonPart); + var success = jsonDoc.RootElement.GetProperty("success").GetBoolean(); + var errorMessage = jsonDoc.RootElement.GetProperty("errorMessage").GetString(); + + if (!success) + { + throw new InvalidOperationException($"Failed to download model: {errorMessage}"); + } + + return modelInfo; + } + public async Task LoadModelAsync(string aliasOrModelId, TimeSpan? timeout = null, CancellationToken ct = default) { var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct) ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog."); diff --git a/sdk/cs/src/FoundryModelInfo.cs b/sdk/cs/src/FoundryModelInfo.cs index e9f7c6fe..15b537ba 100644 --- a/sdk/cs/src/FoundryModelInfo.cs +++ b/sdk/cs/src/FoundryModelInfo.cs @@ -123,7 +123,36 @@ internal sealed class ModelInfo [JsonPropertyName("IgnorePipeReport")] public required bool IgnorePipeReport { get; set; } +} + +internal sealed class UpgradeRequest +{ + internal sealed class UpgradeBody + { + [JsonPropertyName("Name")] + public required string Name { get; set; } = string.Empty; + + [JsonPropertyName("Uri")] + public required string Uri { get; set; } = string.Empty; + + [JsonPropertyName("Publisher")] + public required string Publisher { get; set; } = string.Empty; + + [JsonPropertyName("ProviderType")] + public required string ProviderType { get; set; } = string.Empty; + [JsonPropertyName("PromptTemplate")] + public required PromptTemplate PromptTemplate { get; set; } + } + + [JsonPropertyName("model")] + public required UpgradeBody Model { get; set; } + + [JsonPropertyName("token")] + public required string Token { get; set; } + + [JsonPropertyName("IgnorePipeReport")] + public required bool IgnorePipeReport { get; set; } } public record ModelDownloadProgress From c8190b477779cc50b431590ec66da67bb415febf Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Wed, 9 Jul 2025 15:53:35 -0700 Subject: [PATCH 06/15] add rust APIs --- sdk/rust/src/api.rs | 95 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/sdk/rust/src/api.rs b/sdk/rust/src/api.rs index f212e578..0adabd29 100644 --- a/sdk/rust/src/api.rs +++ b/sdk/rust/src/api.rs @@ -397,6 +397,101 @@ impl FoundryLocalManager { Ok(model_info) } + /// Checks if a newer version of a model is available. + /// + /// # Arguments + /// + /// * `alias_or_model_id` - The alias or model ID to check for upgrades. + /// + /// # Returns + /// + /// True if a newer version is available, otherwise false. + pub async fn is_model_upgradeable( + &mut self, + alias_or_model_id: &str + ) -> Result { + let model_info = self.get_model_info(alias_or_model_id, true).await?; + let url = format!("/openai/upgradeable/{}", model_info.id); + + let client = self.client()?; + let response: Option = client.get(&url, None).await?; + + let data = response.ok_or_else(|| anyhow!("Failed to check model upgrade availability"))?; + data["upgradeable"] + .as_bool() + .ok_or_else(|| anyhow!("Invalid upgrade response format")) + } + + /// Upgrades a model to its latest version. + /// + /// # Arguments + /// + /// * `alias_or_model_id` - The alias or model ID to upgrade. + /// * `token` - Optional token for authentication. + /// + /// # Returns + /// + /// The upgraded model information. + pub async fn upgrade_model( + &mut self, + alias_or_model_id: &str, + token: Option<&str>, + ) -> Result { + let model_info = self.get_model_info(alias_or_model_id, true).await?; + info!( + "Upgrading model: {} ({})", + model_info.alias, model_info.id + ); + + // Create the upgrade body similar to the JS implementation + let mut body = serde_json::json!({ + "model": { + "Name": model_info.id, + "Uri": model_info.uri, + "Publisher": model_info.publisher, + "ProviderType": if model_info.provider == "AzureFoundry" { + "AzureFoundryLocal" + } else { + model_info.provider + }, + "PromptTemplate": model_info.prompt_template, + }, + "IgnorePipeReport": true + }); + + // Add token if provided + if let Some(t) = token { + body["token"] = Value::String(t.to_string()); + } + + let client = self.client()?; + let response: Value = client + .post_with_progress("/openai/upgrade", Some(body)) + .await?; + + // Check if the upgrade was successful + if !response["success"].as_bool().unwrap_or(false) { + let error_msg = response["error"] + .as_str() + .unwrap_or("Unknown error"); + + return Err(anyhow!( + "Failed to upgrade model with alias '{}' and ID '{}': {}", + model_info.alias, + model_info.id, + error_msg + )); + } + + // Refresh the model cache to get the latest information + self.refresh_catalog(); + + // Get the updated model information + let updated_model_info = self.get_model_info(&model_info.id, true).await?; + + Ok(updated_model_info) + } + /// Load a model. /// /// # Arguments From 075e3ed2e2f61a269130697fa93b5a54bcfc6597 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Sun, 27 Jul 2025 17:10:04 -0700 Subject: [PATCH 07/15] update python API to support versioning --- sdk/python/foundry_local/__init__.py | 2 +- sdk/python/foundry_local/api.py | 127 +++++++++++++++------ sdk/python/foundry_local/models.py | 2 + sdk/python/test/test_api.py | 161 +++++++++++++++++++-------- sdk/python/test/test_models.py | 4 + 5 files changed, 216 insertions(+), 80 deletions(-) diff --git a/sdk/python/foundry_local/__init__.py b/sdk/python/foundry_local/__init__.py index 2a0a71ff..b3681488 100644 --- a/sdk/python/foundry_local/__init__.py +++ b/sdk/python/foundry_local/__init__.py @@ -20,4 +20,4 @@ __all__ = ["FoundryLocalManager"] -__version__ = "0.3.1" +__version__ = "0.4.0" diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py index 3a1bec0f..afbd73d0 100644 --- a/sdk/python/foundry_local/api.py +++ b/sdk/python/foundry_local/api.py @@ -133,6 +133,18 @@ def list_catalog_models(self) -> list[FoundryModelInfo]: ] return self._catalog_list + """ + Extract numeric version from ID (e.g. model-x:3 → 3) + + Returns: + int: Numeric version extracted from the model ID, or -1 if not found. + """ + def _get_version(self, model_id: str) -> int: + try: + return int(model_id.split(":")[-1]) + except (ValueError, IndexError): + return -1 + def _get_catalog_dict(self) -> dict[str, FoundryModelInfo]: """ Get a dictionary of available models. Keyed by model ID and alias. Alias points to the most preferred model. @@ -169,6 +181,17 @@ def _get_catalog_dict(self) -> dict[str, FoundryModelInfo]: for alias, candidates in alias_candidates.items(): self._catalog_dict[alias] = min(candidates, key=lambda model: priority_map.get(model.runtime, float("inf"))) + # Choose the best model for each alias based on priority and version + for alias, candidates in alias_candidates.items(): + best_candidate = max( + candidates, + key=lambda m: ( + -priority_map.get(m.runtime, float("inf")), # negate to mimic ascending priority + self._get_version(m.id) # pick the highest version + ) + ) + self._catalog_dict[alias] = best_candidate + return self._catalog_dict def refresh_catalog(self): @@ -178,10 +201,11 @@ def refresh_catalog(self): def get_model_info(self, alias_or_model_id: str, raise_on_not_found: bool = False) -> FoundryModelInfo | None: """ - Get the model information by alias or ID. + Get the model information by of the latest model that matches the given alias or ID. Args: alias_or_model_id (str): Alias or Model ID. If it is an alias, the most preferred model will be returned. + If it is a model ID, it can contain a ":" suffix or not. raise_on_not_found (bool): If True, raise an error if the model is not found. Default is False. Returns: @@ -190,11 +214,62 @@ def get_model_info(self, alias_or_model_id: str, raise_on_not_found: bool = Fals Raises: ValueError: If the model is not found and raise_on_not_found is True. """ - model_info = self._get_catalog_dict().get(alias_or_model_id) + catalog = self._get_catalog_dict() + model_info = None + + # id with version, or alias + if alias_or_model_id in catalog: + model_info = catalog[alias_or_model_id] + elif ":" not in alias_or_model_id: + # alias_or_model_id is an id that does not contain a version + prefix = f"{alias_or_model_id}:" + best_version = -1 + + for key, info in catalog.items(): + if key.startswith(prefix): + try: + version = self._get_version(key) + if version > best_version: + best_version = version + model_info = info + except ValueError: + continue # Skip if version is not numeric + if model_info is None and raise_on_not_found: raise ValueError(f"Model {alias_or_model_id} not found in the catalog.") return model_info + def _get_latest_model_info(self, alias_or_model_id: str, raise_on_not_found: bool = False) -> FoundryModelInfo | None: + """ + Get the latest model information by alias or model ID. + The difference from get_model_info is that this method will return the latest version of the model + even when you pass it a model id that contains a version suffix. + + Args: + alias_or_model_id (str): Alias or Model ID. If it is an alias, the most preferred model will be returned. + raise_on_not_found (bool): If True, raise an error if the model is not found. Default is False. + + Returns: + FoundryModelInfo | None: Latest model information or None if not found. + + Raises: + ValueError: If the model is not found and raise_on_not_found is True. + """ + catalog = self._get_catalog_dict() + + # if alias or id without version + if ":" not in alias_or_model_id: + # if alias + if catalog[alias_or_model_id] is not None: + return catalog[alias_or_model_id] + else: + # if id without version, then get_model_info will get the latest version + return self.get_model_info(alias_or_model_id, raise_on_not_found) + else: + # if id with version, remove the ":" suffix and use the name to get the latest model + id_without_version = alias_or_model_id.split(":")[0] + return self.get_model_info(id_without_version, raise_on_not_found) + # Cache management api def get_cache_location(self): """ @@ -284,48 +359,34 @@ def is_model_upgradeable(self, alias_or_model_id: str) -> bool: Raises: ValueError: If the model is not found in the catalog. """ - model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True) - logger.info("Checking if model '%s' (ID: '%s') is upgradeable...", model_info.alias, model_info.id) + logger.info("Checking if model '%s' is upgradeable...", alias_or_model_id) + model_info = self._get_latest_model_info(alias_or_model_id, raise_on_not_found=True) + if model_info is None: + return False # Model not found in the catalog + + latest_version = self._get_version(model_info.id) + if latest_version == -1: + return False # Invalid model ID format - response = self.httpx_client.get(f"/openai/upgradeable/{model_info.id}") - return response.get("upgradeable", False) + cached_models = self.list_cached_models() + for cached_model in cached_models: + if cached_model.id == model_info.id and self._get_version(cached_model.id) == latest_version: + return False # Model is already the latest version + + return True # The latest version is not in the cache def upgrade_model(self, alias_or_model_id: str, token: str | None = None) -> None: """ - Download the latest version of a model to the local cache. - + Download the latest version of a model to the local cache, if the latest version is not already cached. Args: alias_or_model_id (str): Alias or Model ID. token (str | None): Optional token for authentication. - Raises: ValueError: If the model is not found in the catalog. RuntimeError: If the model upgrade fails. """ - model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True) - logger.info("Upgrading model with alias '%s' and ID '%s'...", model_info.alias, model_info.id) - - upgrade_body = { - "Name": model_info.id, - "Uri": model_info.uri, - "Publisher": model_info.publisher, - "ProviderType": "AzureFoundryLocal" if model_info.provider == "AzureFoundry" else model_info.provider, - "PromptTemplate": model_info.prompt_template, - } - body={ - "model": upgrade_body, - "token": token, - "IgnorePipeReport": True, - }, - - response_body = self.httpx_client.post_with_progress("/openai/upgrade", body=body) - - if not response_body.get("success", False): - raise RuntimeError( - f"Failed to upgrade model with error: {response_body.get('errorMessage', 'Unknown error')}" - ) - - return model_info + model_info = self._get_latest_model_info(alias_or_model_id, raise_on_not_found=True) + return self.download_model(model_info.id, token=token) def load_model(self, alias_or_model_id: str, ttl: int = 600) -> FoundryModelInfo: """ diff --git a/sdk/python/foundry_local/models.py b/sdk/python/foundry_local/models.py index 5168adaa..c4018c07 100644 --- a/sdk/python/foundry_local/models.py +++ b/sdk/python/foundry_local/models.py @@ -78,6 +78,8 @@ class FoundryListResponseModel(BaseModel): license: str = Field(..., description="License of the model") licenseDescription: str = Field(..., description="License description of the model") parentModelUri: str = Field(..., description="Parent model URI of the model") + maxOutputTokens: int = Field(..., description="Maximum output tokens for the model") + minFLVersion: str = Field(..., description="Minimum Foundry Local version required for the model") class FoundryModelInfo(BaseModel): diff --git a/sdk/python/test/test_api.py b/sdk/python/test/test_api.py index 707941af..a301a2fc 100644 --- a/sdk/python/test/test_api.py +++ b/sdk/python/test/test_api.py @@ -24,13 +24,15 @@ "supportsToolCalling": False, "license": "MIT", "licenseDescription": "This model is provided under the License Terms available at ...", + "maxOutputTokens": 1024, + "minFLVersion": "1.0.0", } # Sample catalog with 3 aliases with different combos MOCK_CATALOG_DATA = [ # generic-gpu, generic-cpu { - "name": "model-1-generic-gpu", + "name": "model-1-generic-gpu:1", "displayName": "model-1-generic-gpu", "uri": "azureml://registries/azureml/models/model-1-generic-gpu/versions/1", "runtime": {"deviceType": "GPU", "executionProvider": "WebGpuExecutionProvider"}, @@ -39,7 +41,7 @@ **MOCK_INFO, }, { - "name": "model-1-generic-cpu", + "name": "model-1-generic-cpu:1", "displayName": "model-1-generic-cpu", "uri": "azureml://registries/azureml/models/model-1-generic-cpu/versions/1", "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"}, @@ -47,9 +49,18 @@ "parentModelUri": "azureml://registries/azureml/models/model-1/versions/1", **MOCK_INFO, }, + { + "name": "model-1-generic-cpu:2", + "displayName": "model-1-generic-cpu", + "uri": "azureml://registries/azureml/models/model-1-generic-cpu/versions/2", + "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"}, + "alias": "model-1", + "parentModelUri": "azureml://registries/azureml/models/model-1/versions/2", + **MOCK_INFO, + }, # npu, generic-cpu { - "name": "model-2-npu", + "name": "model-2-npu:1", "displayName": "model-2-npu", "uri": "azureml://registries/azureml/models/model-2-npu/versions/1", "runtime": {"deviceType": "NPU", "executionProvider": "QNNExecutionProvider"}, @@ -58,7 +69,16 @@ **MOCK_INFO, }, { - "name": "model-2-generic-cpu", + "name": "model-2-npu:2", + "displayName": "model-2-npu", + "uri": "azureml://registries/azureml/models/model-2-npu/versions/2", + "runtime": {"deviceType": "NPU", "executionProvider": "QNNExecutionProvider"}, + "alias": "model-2", + "parentModelUri": "azureml://registries/azureml/models/model-2/versions/2", + **MOCK_INFO, + }, + { + "name": "model-2-generic-cpu:1", "displayName": "model-2-generic-cpu", "uri": "azureml://registries/azureml/models/model-2-generic-cpu/versions/1", "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"}, @@ -68,7 +88,7 @@ }, # cuda-gpu, generic-gpu, generic-cpu { - "name": "model-3-cuda-gpu", + "name": "model-3-cuda-gpu:1", "displayName": "model-3-cuda-gpu", "uri": "azureml://registries/azureml/models/model-3-cuda-gpu/versions/1", "runtime": {"deviceType": "GPU", "executionProvider": "CUDAExecutionProvider"}, @@ -77,7 +97,7 @@ **MOCK_INFO, }, { - "name": "model-3-generic-gpu", + "name": "model-3-generic-gpu:1", "displayName": "model-3-generic-gpu", "uri": "azureml://registries/azureml/models/model-3-generic-gpu/versions/1", "runtime": {"deviceType": "GPU", "executionProvider": "WebGpuExecutionProvider"}, @@ -86,7 +106,7 @@ **MOCK_INFO, }, { - "name": "model-3-generic-cpu", + "name": "model-3-generic-cpu:1", "displayName": "model-3-generic-cpu", "uri": "azureml://registries/azureml/models/model-3-generic-cpu/versions/1", "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"}, @@ -96,7 +116,7 @@ }, # generic-cpu { - "name": "model-4-generic-gpu", + "name": "model-4-generic-gpu:1", "displayName": "model-4-generic-gpu", "uri": "azureml://registries/azureml/models/model-4-generic-gpu/versions/1", "runtime": {"deviceType": "GPU", "executionProvider": "WebGpuExecutionProvider"}, @@ -110,10 +130,10 @@ MOCK_STATUS_RESPONSE = {"modelDirPath": "/test/path/to/models"} # Mock response for /openai/models -MOCK_LOCAL_MODELS = ["model-2-npu", "model-4-generic-gpu"] +MOCK_LOCAL_MODELS = ["model-2-npu:1", "model-4-generic-gpu:1"] # Mock response for /openai/loadedmodels -MOCK_LOADED_MODELS = ["model-2-npu"] +MOCK_LOADED_MODELS = ["model-2-npu:1"] @pytest.fixture(scope="module", autouse=True) @@ -174,7 +194,7 @@ def test_initialization(mock_http_client): # Test with bootstrap and model_id with mock.patch("foundry_local.api.start_service") as mock_start: mock_start.return_value = "http://localhost:5272" - manager = FoundryLocalManager(alias_or_model_id="model-2", bootstrap=True) + manager = FoundryLocalManager(alias_or_model_id="model-4", bootstrap=True) mock_start.assert_called_once() mock_http_client.get.assert_any_call("/foundry/list") # in local models @@ -186,21 +206,25 @@ def test_list_catalog_models(mock_http_client): manager = FoundryLocalManager(bootstrap=False) models = manager.list_catalog_models() mock_http_client.get.assert_called_once_with("/foundry/list") - assert len(models) == 8 + assert len(models) == len(MOCK_CATALOG_DATA) assert all(isinstance(model, FoundryModelInfo) for model in models) assert [model.id for model in models] == [ - "model-1-generic-gpu", - "model-1-generic-cpu", - "model-2-npu", - "model-2-generic-cpu", - "model-3-cuda-gpu", - "model-3-generic-gpu", - "model-3-generic-cpu", - "model-4-generic-gpu", + "model-1-generic-gpu:1", + "model-1-generic-cpu:1", + "model-1-generic-cpu:2", + "model-2-npu:1", + "model-2-npu:2", + "model-2-generic-cpu:1", + "model-3-cuda-gpu:1", + "model-3-generic-gpu:1", + "model-3-generic-cpu:1", + "model-4-generic-gpu:1", ] assert [model.alias for model in models] == [ "model-1", "model-1", + "model-1", + "model-2", "model-2", "model-2", "model-3", @@ -231,18 +255,20 @@ def test_get_model_info(platform, mock_http_client): with pytest.raises(ValueError): manager.get_model_info("unknown-model", raise_on_not_found=True) - # with id - assert manager.get_model_info("model-1-generic-cpu").id == "model-1-generic-cpu" + # with id that contains version + assert manager.get_model_info("model-1-generic-cpu:1").id == "model-1-generic-cpu:1" + + # with id that does not contain version + assert manager.get_model_info("model-1-generic-cpu").id == "model-1-generic-cpu:2" # with alias # generic-cpu preferred on Windows assert ( - manager.get_model_info("model-1").id == "model-1-generic-cpu" - if platform == "Windows" - else "model-1-generic-gpu" + manager.get_model_info("model-1").id == "model-1-generic-cpu:2" if platform == "Windows" + else "model-1-generic-gpu:1" ) - assert manager.get_model_info("model-2").id == "model-2-npu" - assert manager.get_model_info("model-3").id == "model-3-cuda-gpu" + assert manager.get_model_info("model-2").id == "model-2-npu:2" # latest version, even if not in cache + assert manager.get_model_info("model-3").id == "model-3-cuda-gpu:1" def test_list_cached_models(mock_http_client): @@ -250,8 +276,8 @@ def test_list_cached_models(mock_http_client): manager = FoundryLocalManager(bootstrap=False) local_models = manager.list_cached_models() assert len(local_models) == 2 - assert local_models[0].id == "model-2-npu" - assert local_models[1].id == "model-4-generic-gpu" + assert local_models[0].id == "model-2-npu:1" + assert local_models[1].id == "model-4-generic-gpu:1" def test_list_loaded_models(mock_http_client): @@ -259,7 +285,7 @@ def test_list_loaded_models(mock_http_client): manager = FoundryLocalManager(bootstrap=False) loaded_models = manager.list_loaded_models() assert len(loaded_models) == 1 - assert loaded_models[0].id == "model-2-npu" + assert loaded_models[0].id == "model-2-npu:1" def test_download_model(mock_http_client): @@ -268,20 +294,24 @@ def test_download_model(mock_http_client): # Test downloading a new model model_info = manager.download_model("model-3") - assert model_info.id == "model-3-cuda-gpu" + assert model_info.id == "model-3-cuda-gpu:1" mock_http_client.post_with_progress.assert_called_once() - - # Reset mock for next test - mock_http_client.post_with_progress.reset_mock() + mock_http_client.post_with_progress.reset_mock() # Reset mock for next test # Test downloading an already cached model - model_info = manager.download_model("model-2") - assert model_info.id == "model-2-npu" + model_info = manager.download_model("model-2-npu:1") + assert model_info.id == "model-2-npu:1" mock_http_client.post_with_progress.assert_not_called() + # Test download a model that is not at the latest version + model_info = manager.download_model("model-2") + assert model_info.id == "model-2-npu:2" + mock_http_client.post_with_progress.assert_called_once() + mock_http_client.post_with_progress.reset_mock() # Reset mock for next test + # Test force download model_info = manager.download_model("model-2", force=True) - assert model_info.id == "model-2-npu" + assert model_info.id == "model-2-npu:2" mock_http_client.post_with_progress.assert_called_once() # Test download failure @@ -290,20 +320,59 @@ def test_download_model(mock_http_client): manager.download_model("model-1") +def test_is_model_upgradeable(mock_http_client): + manager = FoundryLocalManager(bootstrap=False) + + # Not in cache, even if at the latest version + assert manager.is_model_upgradeable("model-1") is True + assert manager.is_model_upgradeable("model-1-generic-cpu:1") is True + assert manager.is_model_upgradeable("model-1-generic-cpu:2") is True + assert manager.is_model_upgradeable("model-1-generic-gpu:1") is True + assert manager.is_model_upgradeable("model-2-npu:2") is True + + # In cache, at the latest version + assert manager.is_model_upgradeable("model-4-generic-gpu:1") is False + + # In cache, not at the latest version + assert manager.is_model_upgradeable("model-2-npu:1") is True + + +def test_upgrade_model(mock_http_client): + """Test downloading a model.""" + manager = FoundryLocalManager(bootstrap=False) + + # Test upgrading a model that is not in the cache at all + model_info = manager.upgrade_model("model-3") + assert model_info.id == "model-3-cuda-gpu:1" + mock_http_client.post_with_progress.assert_called_once() + mock_http_client.post_with_progress.reset_mock() # Reset mock for next test + + # Test upgrading a model that has an older version in the cache + model_info = manager.upgrade_model("model-2-npu:1") + assert model_info.id == "model-2-npu:2" + mock_http_client.post_with_progress.assert_called_once() + mock_http_client.post_with_progress.reset_mock() # Reset mock for next test + + # Test upgrading a model that has the latest version in the cache + model_info = manager.upgrade_model("model-4") + assert model_info.id == "model-4-generic-gpu:1" + mock_http_client.post_with_progress.assert_not_called() + + def test_load_model(mock_http_client): """Test loading a model.""" manager = FoundryLocalManager(bootstrap=False) # already loaded model model_info = manager.load_model("model-2") - assert model_info.id == "model-2-npu" - mock_http_client.get.assert_any_call("/openai/load/model-2-npu", query_params={"ttl": 600}) + assert model_info.id == "model-2-npu:2" + mock_http_client.get.assert_any_call("/openai/load/model-2-npu:2", query_params={"ttl": 600}) # not loaded model model_info = manager.load_model("model-4") - assert model_info.id == "model-4-generic-gpu" + assert model_info.id == "model-4-generic-gpu:1" # ep override, should be cuda since there is cuda support - mock_http_client.get.assert_any_call("/openai/load/model-4-generic-gpu", query_params={"ttl": 600, "ep": "cuda"}) + mock_http_client.get.assert_any_call("/openai/load/model-4-generic-gpu:1", query_params={"ttl": 600, "ep": "cuda"}) # Test loading a non-downloaded model def mock_get(path, query_params=None): @@ -319,17 +388,17 @@ def test_unload_model(mock_http_client): manager = FoundryLocalManager(bootstrap=False) # Test unloading a loaded model - manager.unload_model("model-2") - mock_http_client.get.assert_any_call("/openai/unload/model-2-npu", query_params={"force": False}) + manager.unload_model("model-2-npu:1") + mock_http_client.get.assert_any_call("/openai/unload/model-2-npu:1", query_params={"force": False}) # Test unloading a model that's not loaded mock_http_client.get.reset_mock() manager.unload_model("model-4") assert ( - mock.call("/openai/unload/model-4-generic-gpu", query_params={"force": False}) + mock.call("/openai/unload/model-4-generic-gpu:1", query_params={"force": False}) not in mock_http_client.get.call_args_list ) # Test force unloading - manager.unload_model("model-2", force=True) - mock_http_client.get.assert_any_call("/openai/unload/model-2-npu", query_params={"force": True}) + manager.unload_model("model-2-npu:1", force=True) + mock_http_client.get.assert_any_call("/openai/unload/model-2-npu:1", query_params={"force": True}) diff --git a/sdk/python/test/test_models.py b/sdk/python/test/test_models.py index b1feca29..a6d8a779 100644 --- a/sdk/python/test/test_models.py +++ b/sdk/python/test/test_models.py @@ -54,6 +54,8 @@ def test_foundry_list_response_model(): "license": "MIT", "licenseDescription": "Test license", "parentModelUri": "azureml://parent", + "maxOutputTokens": 1024, + "minFLVersion": "1.0.0" } model = FoundryListResponseModel.model_validate(response_data) @@ -134,6 +136,8 @@ def test_from_list_response(): "license": "MIT", "licenseDescription": "Test license", "parentModelUri": "azureml://parent", + "maxOutputTokens": 1024, + "minFLVersion": "1.0.0", } # Test with dict From 88e94c678ce456cef37f56efb8627bc2d3672619 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Sun, 27 Jul 2025 19:06:23 -0700 Subject: [PATCH 08/15] remove redundant code --- sdk/python/foundry_local/api.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py index afbd73d0..56a736a4 100644 --- a/sdk/python/foundry_local/api.py +++ b/sdk/python/foundry_local/api.py @@ -177,10 +177,6 @@ def _get_catalog_dict(self) -> dict[str, FoundryModelInfo]: priority_map = {provider: index for index, provider in enumerate(preferred_order)} - # Choose the preferred model for each alias - for alias, candidates in alias_candidates.items(): - self._catalog_dict[alias] = min(candidates, key=lambda model: priority_map.get(model.runtime, float("inf"))) - # Choose the best model for each alias based on priority and version for alias, candidates in alias_candidates.items(): best_candidate = max( @@ -201,7 +197,7 @@ def refresh_catalog(self): def get_model_info(self, alias_or_model_id: str, raise_on_not_found: bool = False) -> FoundryModelInfo | None: """ - Get the model information by of the latest model that matches the given alias or ID. + Get the model information of the latest model that matches the given alias or ID. Args: alias_or_model_id (str): Alias or Model ID. If it is an alias, the most preferred model will be returned. @@ -255,6 +251,11 @@ def _get_latest_model_info(self, alias_or_model_id: str, raise_on_not_found: boo Raises: ValueError: If the model is not found and raise_on_not_found is True. """ + if not alias_or_model_id: + if raise_on_not_found: + raise ValueError("The provided nodel alias or ID was empty.") + return None + catalog = self._get_catalog_dict() # if alias or id without version @@ -371,7 +372,7 @@ def is_model_upgradeable(self, alias_or_model_id: str) -> bool: cached_models = self.list_cached_models() for cached_model in cached_models: if cached_model.id == model_info.id and self._get_version(cached_model.id) == latest_version: - return False # Model is already the latest version + return False # Cached model is already at the latest version return True # The latest version is not in the cache From ff703993688ef3205bf817579bdc16201e27fc54 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Sun, 27 Jul 2025 20:08:49 -0700 Subject: [PATCH 09/15] update JS API to support versioning --- sdk/js/package-lock.json | 4 +- sdk/js/package.json | 2 +- sdk/js/src/base.ts | 143 ++++++++++++++++++------- sdk/js/src/types.ts | 40 ++----- sdk/js/test/base.test.ts | 219 +++++++++++++++++++++++++++++++++++---- 5 files changed, 314 insertions(+), 94 deletions(-) diff --git a/sdk/js/package-lock.json b/sdk/js/package-lock.json index 46a7cd26..bfd3da47 100644 --- a/sdk/js/package-lock.json +++ b/sdk/js/package-lock.json @@ -1,12 +1,12 @@ { "name": "foundry-local-sdk", - "version": "0.3.0", + "version": "0.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "foundry-local-sdk", - "version": "0.3.0", + "version": "0.4.0", "license": "MIT", "dependencies": { "whatwg-fetch": "^3.6.20" diff --git a/sdk/js/package.json b/sdk/js/package.json index e1999e8e..3bb2f510 100644 --- a/sdk/js/package.json +++ b/sdk/js/package.json @@ -1,6 +1,6 @@ { "name": "foundry-local-sdk", - "version": "0.3.1", + "version": "0.4.0", "description": "Foundry Local Manager Javascript SDK", "repository": { "type": "git", diff --git a/sdk/js/src/base.ts b/sdk/js/src/base.ts index 4b2c0141..7fcf70de 100644 --- a/sdk/js/src/base.ts +++ b/sdk/js/src/base.ts @@ -3,7 +3,7 @@ import * as client from './client.js' import { ExecutionProvider } from './types.js' -import type { DownloadBody, UpgradeBody, Fetch, FoundryModelInfo, FoundryListResponseModel } from './types.js' +import type { DownloadBody, Fetch, FoundryModelInfo, FoundryListResponseModel } from './types.js' /** * Utility function to detect if the platform is Windows. @@ -112,6 +112,20 @@ export class FoundryLocalManager { return this.catalogList } + /** + * Extracts numeric version from ID (e.g. model-x:3 → 3) + * @returns {number} Numeric version extracted from the model ID, or -1 if not found. + */ + private getVersion(modelId: string): number { + try { + const versionStr = modelId.split(":")[1]; + const version = parseInt(versionStr, 10); + return isNaN(version) ? -1 : version; + } catch { + return -1; + } + } + /** * Gets the catalog record. * @returns {Promise>} The catalog record. @@ -149,12 +163,18 @@ export class FoundryLocalManager { // Choose the preferred model for each alias Object.entries(aliasCandidates).forEach(([alias, candidates]) => { const bestCandidate = candidates.reduce((best, current) => { - const bestPriority = priorityMap.get(best.runtime) ?? Infinity - const currentPriority = priorityMap.get(current.runtime) ?? Infinity - return currentPriority < bestPriority ? current : best + const bestPriority = -(priorityMap.get(best.runtime) ?? Infinity) + const currentPriority = -(priorityMap.get(current.runtime) ?? Infinity) + + const bestVersion = this.getVersion(best.id) + const currentVersion = this.getVersion(current.id) + + if (currentPriority > bestPriority || (currentPriority === bestPriority && currentVersion > bestVersion)) { + return current + } + return best }) - // Explicitly assign the best candidate to avoid null/undefined issues if (this.catalogRecord) { this.catalogRecord[alias] = bestCandidate } @@ -173,18 +193,73 @@ export class FoundryLocalManager { } /** - * Gets model information by alias or ID. + * Gets the model information of the latest model that matches the given alias or ID. * @param {string} aliasOrModelId - The alias or model ID. * @param {boolean} throwOnNotFound - Whether to throw an error if the model is not found. * @returns {Promise} The model information or null if not found. */ async getModelInfo(aliasOrModelId: string, throwOnNotFound = false): Promise { - const catalogRecord = await this.getCatalogRecord() - const modelInfo = catalogRecord[aliasOrModelId] + const catalog = await this.getCatalogRecord() + let modelInfo: FoundryModelInfo | null = null; + + // Exact match (ID with version or alias) + if (aliasOrModelId in catalog) { + modelInfo = catalog[aliasOrModelId]; + } else if (!aliasOrModelId.includes(":")) { + // ID without version — find the latest version + const prefix = `${aliasOrModelId}:`; + let bestVersion = -1; + + for (const key of Object.keys(catalog)) { + if (key.startsWith(prefix)) { + const version = this.getVersion(key); + if (version > bestVersion) { + bestVersion = version; + modelInfo = catalog[key]; + } + } + } + } + if (!modelInfo && throwOnNotFound) { - throw new Error(`Model with alias or ID ${aliasOrModelId} not found in the catalog`) + throw new Error(`Model with alias or ID '${aliasOrModelId}' not found in the catalog`); } - return modelInfo ?? null + + return modelInfo; + } + + /** + * Gets the latest model information by alias or model ID. + * The difference from getModelInfo is that this method will return the latest version of the model + * even when you pass it a model id that contains a version suffix. + * @param {string} aliasOrModelId - The alias or model ID. + * @param {boolean} throwOnNotFound - Whether to throw an error if the model is not found. + * @returns {Promise} The model information or null if not found. + */ + private async getLatestModelInfo(aliasOrModelId: string, throwOnNotFound = false): Promise { + if (!aliasOrModelId) { + if (throwOnNotFound) { + throw new Error('The provided model alias or ID was empty.'); + } + return null; + } + + const catalog = await this.getCatalogRecord(); + + // alias or ID without version + if (!aliasOrModelId.includes(":")) { + const model = catalog[aliasOrModelId]; + if (model) { + return model; + } + + // if ID without version, then getModelInfo will get the latest version + return await this.getModelInfo(aliasOrModelId, throwOnNotFound); + } + + // if ID with version, remove the ":" suffix and use the name to get the latest model + const idWithoutVersion = aliasOrModelId.split(":")[0]; + return await this.getModelInfo(idWithoutVersion, throwOnNotFound); } /** @@ -278,11 +353,24 @@ export class FoundryLocalManager { * @returns {Promise} True if a newer version is available, otherwise false. */ async isModelUpgradeable(aliasOrModelId: string): Promise { - const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo + const modelInfo = await this.getLatestModelInfo(aliasOrModelId, true); + if (!modelInfo) { + return false; // Model not found in the catalog + } - const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradeable/${modelInfo.id}`) - const data = await response.json() - return data.upgradeable + const latestVersion = this.getVersion(modelInfo.id); + if (latestVersion === -1) { + return false; // Invalid version format + } + + const cachedModels = await this.listCachedModels(); + for (const cached of cachedModels) { + if (cached.id === modelInfo.id && this.getVersion(cached.id) === latestVersion) { + return false; // Cached model is already at the latest version + } + } + + return true; // The latest version is not in the cache } /** @@ -297,31 +385,8 @@ export class FoundryLocalManager { token?: string, onProgress?: (progress: number) => void, ): Promise { - const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo - - const upgradeBody: UpgradeBody = { - Name: modelInfo.id, - Uri: modelInfo.uri, - Publisher: modelInfo.publisher, - ProviderType: modelInfo.provider === 'AzureFoundry' ? `AzureFoundryLocal` : modelInfo.provider, - PromptTemplate: modelInfo.promptTemplate, - } - - const body = { - model: upgradeBody, - ...(token && { token }), - IgnorePipeReport: true, - } - - const data = await client.postWithProgress(this.fetch, `${this.serviceUrl}/openai/upgrade`, body, onProgress) - - if (!data.success) { - throw new Error( - `Failed to upgrade model with alias '${modelInfo.alias}' and ID '${modelInfo.id}': ${data.error}`, - ) - } - - return modelInfo + const modelInfo = await this.getLatestModelInfo(aliasOrModelId, true) as FoundryModelInfo; + return this.downloadModel(modelInfo.id, token, false, onProgress) } /** diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts index 1467bac5..8d4e852a 100644 --- a/sdk/js/src/types.ts +++ b/sdk/js/src/types.ts @@ -128,6 +128,16 @@ export interface FoundryListResponseModel { * The URI of the parent model. */ parentModelUri: string + + /** + * The maximum number of output tokens. + */ + maxOutputTokens: number + + /** + * The minimum Foundry Local version required to use this model. + */ + minFLVersion: string } /** @@ -219,33 +229,3 @@ export interface DownloadBody { */ PromptTemplate: Record } - -/** - * Interface representing the body of an upgrade request. - */ -export interface UpgradeBody { - /** - * The name of the model. - */ - Name: string - - /** - * The URI of the model. - */ - Uri: string - - /** - * The publisher of the model. - */ - Publisher: string - - /** - * The provider type of the model. - */ - ProviderType: string - - /** - * The prompt template associated with the model. - */ - PromptTemplate: Record -} diff --git a/sdk/js/test/base.test.ts b/sdk/js/test/base.test.ts index 00508415..8a5ab221 100644 --- a/sdk/js/test/base.test.ts +++ b/sdk/js/test/base.test.ts @@ -60,7 +60,7 @@ describe('FoundryLocalManager', () => { const mockResponse = { json: vi.fn().mockResolvedValue([ { - name: 'model_name', + name: 'model_name:1', displayName: 'model_name', modelType: 'ONNX', providerType: 'AzureFoundry', @@ -77,6 +77,8 @@ describe('FoundryLocalManager', () => { license: 'MIT', licenseDescription: 'This model is provided under the License Terms available at ...', parentModelUri: 'azureml://registries/azureml/models/model_parent/versions/1', + maxOutputTokens: 1024, + minFLVersion: '1.0.0', }, ]), } @@ -89,7 +91,7 @@ describe('FoundryLocalManager', () => { expect(models).toHaveLength(1) expect(models[0]).toEqual({ alias: 'model_alias', - id: 'model_name', + id: 'model_name:1', version: '1', runtime: ExecutionProvider.CPU, uri: 'azureml://registries/azureml/models/model_name/versions/1', @@ -107,7 +109,7 @@ describe('FoundryLocalManager', () => { manager['catalogList'] = [ { alias: 'model_alias', - id: 'model_name', + id: 'model_name:1', version: '1', runtime: ExecutionProvider.CPU, uri: 'azureml://registries/azureml/models/model_name/versions/1', @@ -146,45 +148,55 @@ describe('FoundryLocalManager', () => { manager['catalogList'] = [ // eneric-gpu, generic-cpu { - id: 'model-1-generic-gpu', + id: 'model-1-generic-gpu:1', runtime: ExecutionProvider.WEBGPU, alias: 'model-1', } as any, { - id: 'model-1-generic-cpu', + id: 'model-1-generic-cpu:1', + runtime: ExecutionProvider.CPU, + alias: 'model-1', + }, + { + id: 'model-1-generic-cpu:2', runtime: ExecutionProvider.CPU, alias: 'model-1', }, // npu, generic-cpu { - id: 'model-2-npu', + id: 'model-2-npu:1', runtime: ExecutionProvider.QNN, alias: 'model-2', }, { - id: 'model-2-generic-cpu', + id: 'model-2-npu:2', + runtime: ExecutionProvider.QNN, + alias: 'model-2', + }, + { + id: 'model-2-generic-cpu:1', runtime: ExecutionProvider.CPU, alias: 'model-2', }, // cuda-gpu, generic-gpu, generic-cpu { - id: 'model-3-cuda-gpu', + id: 'model-3-cuda-gpu:1', runtime: ExecutionProvider.CUDA, alias: 'model-3', }, { - id: 'model-3-generic-gpu', + id: 'model-3-generic-gpu:1', runtime: ExecutionProvider.WEBGPU, alias: 'model-3', }, { - id: 'model-3-generic-cpu', + id: 'model-3-generic-cpu:1', runtime: ExecutionProvider.CPU, alias: 'model-3', }, // generic-cpu { - id: 'model-4-generic-cpu', + id: 'model-4-generic-cpu:1', runtime: ExecutionProvider.CPU, alias: 'model-4', }, @@ -192,26 +204,26 @@ describe('FoundryLocalManager', () => { }) it('should return model info by id', async () => { - expect((await manager.getModelInfo('model-1-generic-gpu'))?.id).toBe('model-1-generic-gpu') - expect((await manager.getModelInfo('model-1-generic-cpu'))?.id).toBe('model-1-generic-cpu') + expect((await manager.getModelInfo('model-1-generic-gpu'))?.id).toBe('model-1-generic-gpu:1') + expect((await manager.getModelInfo('model-1-generic-cpu'))?.id).toBe('model-1-generic-cpu:2') }) it('should return model info by alias on Windows', async () => { vi.spyOn(process, 'platform', 'get').mockReturnValue('win32') - expect((await manager.getModelInfo('model-1'))?.id).toBe('model-1-generic-cpu') // cpu is preferred over webgpu - expect((await manager.getModelInfo('model-2'))?.id).toBe('model-2-npu') // npu most preferred - expect((await manager.getModelInfo('model-3'))?.id).toBe('model-3-cuda-gpu') // cuda most preferred - expect((await manager.getModelInfo('model-4'))?.id).toBe('model-4-generic-cpu') // generic-cpu + expect((await manager.getModelInfo('model-1'))?.id).toBe('model-1-generic-cpu:2') // cpu is preferred over webgpu + expect((await manager.getModelInfo('model-2'))?.id).toBe('model-2-npu:2') // npu most preferred + expect((await manager.getModelInfo('model-3'))?.id).toBe('model-3-cuda-gpu:1') // cuda most preferred + expect((await manager.getModelInfo('model-4'))?.id).toBe('model-4-generic-cpu:1') // generic-cpu }) it('should return model info by alias on non-Windows', async () => { vi.spyOn(process, 'platform', 'get').mockReturnValue('linux') - expect((await manager.getModelInfo('model-1'))?.id).toBe('model-1-generic-gpu') // webgpu is preferred over cpu - expect((await manager.getModelInfo('model-2'))?.id).toBe('model-2-npu') // npu most preferred - expect((await manager.getModelInfo('model-3'))?.id).toBe('model-3-cuda-gpu') // cuda most preferred - expect((await manager.getModelInfo('model-4'))?.id).toBe('model-4-generic-cpu') // generic-cpu + expect((await manager.getModelInfo('model-1'))?.id).toBe('model-1-generic-gpu:1') // webgpu is preferred over cpu + expect((await manager.getModelInfo('model-2'))?.id).toBe('model-2-npu:2') // npu most preferred + expect((await manager.getModelInfo('model-3'))?.id).toBe('model-3-cuda-gpu:1') // cuda most preferred + expect((await manager.getModelInfo('model-4'))?.id).toBe('model-4-generic-cpu:1') // generic-cpu }) it('should return null for non-existent model', async () => { @@ -222,7 +234,7 @@ describe('FoundryLocalManager', () => { it('should throw error for non-existent model when throwOnNotFound is true', async () => { await expect(manager.getModelInfo('non_existent', true)).rejects.toThrow( - 'Model with alias or ID non_existent not found in the catalog', + 'Model with alias or ID \'non_existent\' not found in the catalog', ) }) }) @@ -384,6 +396,169 @@ describe('FoundryLocalManager', () => { }) }) + describe('isModelUpgradeable', () => { + it('returns true if model is not cached', async () => { + vi.spyOn(manager, 'getModelInfo').mockResolvedValue({ + id: 'model-3-cuda-gpu:1', + alias: 'model-3', + } as any) + + vi.spyOn(manager, 'listCachedModels').mockResolvedValue([]) + + vi.spyOn(manager, 'listCatalogModels').mockResolvedValue([ + { + id: 'model-3-cuda-gpu:1', + alias: 'model-3', + runtime: 'CUDAExecutionProvider', + } as any, + ]) + + const result = await manager.isModelUpgradeable('model-3') + expect(result).toBe(true) + }) + + it('returns true if model is cached but older version', async () => { + vi.spyOn(manager, 'getModelInfo').mockResolvedValue({ + id: 'model-2-npu:2', + alias: 'model-2', + runtime: ExecutionProvider.QNN, + } as any) + + vi.spyOn(manager, 'listCachedModels').mockResolvedValue([ + { id: 'model-2-npu:1' } as any, + ]) + + vi.spyOn(manager, 'listCatalogModels').mockResolvedValue([ + { + id: 'model-2-npu:2', + alias: 'model-2', + runtime: ExecutionProvider.QNN, + } as any, + ]) + + const result = await manager.isModelUpgradeable('model-2-npu:1') + expect(result).toBe(true) + }) + + it('returns false if model is cached and latest version', async () => { + vi.spyOn(manager, 'getModelInfo').mockResolvedValue({ + id: 'model-4-generic-gpu:1', + alias: 'model-4', + runtime: ExecutionProvider.WEBGPU, + } as any) + + vi.spyOn(manager, 'listCachedModels').mockResolvedValue([ + { id: 'model-4-generic-gpu:1' } as any, + ]) + + vi.spyOn(manager, 'listCatalogModels').mockResolvedValue([ + { + id: 'model-4-generic-gpu:1', + alias: 'model-4', + runtime: ExecutionProvider.WEBGPU, + } as any, + ]) + + const result = await manager.isModelUpgradeable('model-4') + expect(result).toBe(false) + }) + + it('returns false if model version is invalid', async () => { + vi.spyOn(manager, 'getModelInfo').mockResolvedValue({ + id: 'model-invalid-version', + alias: 'model-invalid', + runtime: ExecutionProvider.CPU, + } as any) + + vi.spyOn(manager, 'listCachedModels').mockResolvedValue([]) + + // simulate getVersion returning -1 + vi.spyOn(manager as any, 'getVersion').mockReturnValue(-1) + + vi.spyOn(manager, 'listCatalogModels').mockResolvedValue([ + { + id: 'model-invalid-version', + alias: 'model-invalid', + runtime: ExecutionProvider.CPU, + } as any, + ]) + + const result = await manager.isModelUpgradeable('model-invalid-version') + expect(result).toBe(false) + }) + }) + + describe('upgradeModel', () => { + it('downloads model if not in cache', async () => { + const mockModel = { + id: 'model-3-cuda-gpu:1', + alias: 'model-3', + runtime: ExecutionProvider.CUDA, + uri: 'https://example.com/model', + publisher: 'Microsoft', + provider: 'AzureFoundry', + promptTemplate: {}, + } as any + + vi.spyOn(manager, 'getLatestModelInfo').mockResolvedValue(mockModel) + const downloadSpy = vi.spyOn(manager, 'downloadModel').mockResolvedValue(mockModel) + + const result = await manager.upgradeModel('model-3') + + expect(manager.getLatestModelInfo).toHaveBeenCalledWith('model-3', true) + expect(downloadSpy).toHaveBeenCalledWith('model-3-cuda-gpu:1', undefined, false, undefined) + expect(result).toEqual(mockModel) + }) + + it('downloads latest version if older version is in cache', async () => { + const mockModel = { + id: 'model-2-npu:2', + alias: 'model-2', + runtime: ExecutionProvider.QNN, + uri: 'https://example.com/model2', + publisher: 'Microsoft', + provider: 'AzureFoundry', + promptTemplate: {}, + } as any + + vi.spyOn(manager, 'getLatestModelInfo').mockResolvedValue(mockModel) + const downloadSpy = vi.spyOn(manager, 'downloadModel').mockResolvedValue(mockModel) + + const result = await manager.upgradeModel('model-2-npu:1') + + expect(manager.getLatestModelInfo).toHaveBeenCalledWith('model-2-npu:1', true) + expect(downloadSpy).toHaveBeenCalledWith('model-2-npu:2', undefined, false, undefined) + expect(result).toEqual(mockModel) + }) + + it('does not redownload model if already latest', async () => { + const mockModel = { + id: 'model-4-generic-gpu:1', + alias: 'model-4', + runtime: ExecutionProvider.WEBGPU, + uri: 'https://example.com/model4', + publisher: 'Microsoft', + provider: 'AzureFoundry', + promptTemplate: {}, + } as any + + vi.spyOn(manager, 'getLatestModelInfo').mockResolvedValue(mockModel) + const downloadSpy = vi.spyOn(manager, 'downloadModel').mockResolvedValue(mockModel) + + const result = await manager.upgradeModel('model-4') + + expect(manager.getLatestModelInfo).toHaveBeenCalledWith('model-4', true) + expect(downloadSpy).toHaveBeenCalledWith('model-4-generic-gpu:1', undefined, false, undefined) + expect(result).toEqual(mockModel) + }) + + it('throws error if getLatestModelInfo fails', async () => { + vi.spyOn(manager, 'getLatestModelInfo').mockRejectedValue(new Error('Not found')) + + await expect(manager.upgradeModel('nonexistent-model')).rejects.toThrow('Not found') + }) + }) + describe('loadModel', () => { it('should load model with default TTL', async () => { // Setup model info From a21e8ec20c9d8a9732740c56ae5db977997c858d Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Sun, 27 Jul 2025 21:35:44 -0700 Subject: [PATCH 10/15] update C# API to support versioning --- sdk/cs/src/FoundryLocalManager.cs | 241 ++++++++++++------ sdk/cs/src/FoundryModelInfo.cs | 6 + sdk/cs/src/Microsoft.AI.Foundry.Local.csproj | 2 +- .../FoundryLocalManagerTest.cs | 191 +++++++++++++- 4 files changed, 363 insertions(+), 77 deletions(-) diff --git a/sdk/cs/src/FoundryLocalManager.cs b/sdk/cs/src/FoundryLocalManager.cs index b71cf9a1..b1a817d0 100644 --- a/sdk/cs/src/FoundryLocalManager.cs +++ b/sdk/cs/src/FoundryLocalManager.cs @@ -171,10 +171,38 @@ public void RefreshCatalog() public async Task GetModelInfoAsync(string aliasOrModelId, CancellationToken ct = default) { - var dictionary = await GetCatalogDictAsync(ct); + var catalog = await GetCatalogDictAsync(ct); + ModelInfo? modelInfo = null; - dictionary.TryGetValue(aliasOrModelId, out ModelInfo? model); - return model; + // Direct match (id with version or alias) + if (catalog.TryGetValue(aliasOrModelId, out var directMatch)) + { + modelInfo = directMatch; + } + else if (!aliasOrModelId.Contains(':')) + { + // If no direct match and aliasOrModelId does not contain a version suffix + var prefix = aliasOrModelId + ":"; + var bestVersion = -1; + + foreach (var kvp in catalog) + { + var key = kvp.Key; + ModelInfo model = kvp.Value; + + if (key.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)) + { + var version = GetVersion(key); + if (version > bestVersion) + { + bestVersion = version; + modelInfo = model; + } + } + } + } + + return modelInfo; } public async Task GetCacheLocationAsync(CancellationToken ct = default) @@ -247,64 +275,49 @@ public async Task> ListCachedModelsAsync(CancellationToken ct = public async Task IsModelUpgradeableAsync(string aliasOrModelId, CancellationToken ct = default) { - var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct) - ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog."); - - await StartServiceAsync(ct); - var response = await _serviceClient!.GetAsync($"/openai/upgradeable/{modelInfo.ModelId}", ct); - response.EnsureSuccessStatusCode(); - - var jsonResponse = await response.Content.ReadAsStringAsync(ct); - using var jsonDoc = JsonDocument.Parse(jsonResponse); - return jsonDoc.RootElement.GetProperty("upgradeable").GetBoolean(); - } - - public async Task UpgradeModelAsync( - string aliasOrModelId, - string? token = null, - CancellationToken ct = default) - { - var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct) - ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog."); - - var request = new UpgradeRequest + var modelInfo = await GetLatestModelInfoAsync(aliasOrModelId, ct); + if (modelInfo == null) { - Model = new UpgradeRequest.UpgradeBody - { - Name = modelInfo.ModelId, - Uri = modelInfo.Uri, - Publisher = modelInfo.Publisher, - ProviderType = modelInfo.ProviderType == "AzureFoundry" ? "AzureFoundryLocal" : modelInfo.ProviderType, - PromptTemplate = modelInfo.PromptTemplate - }, - Token = token ?? "", - IgnorePipeReport = true - }; + return false; // Model not found in the catalog + } - var response = await _serviceClient!.PostAsJsonAsync("/openai/upgrade", request, ct); - response.EnsureSuccessStatusCode(); - var responseBody = await response.Content.ReadAsStringAsync(ct); + var latestVersion = GetVersion(modelInfo.ModelId); + if (latestVersion == -1) + { + return false; // Invalid version format in model ID + } - // Find the last '{' to get the start of the JSON object - var jsonStart = responseBody.LastIndexOf('{'); - if (jsonStart == -1) + var cachedModels = await ListCachedModelsAsync(ct); + foreach (var cachedModel in cachedModels) { - throw new InvalidOperationException("No JSON object found in response."); + if (cachedModel.ModelId.Equals(modelInfo.ModelId, StringComparison.OrdinalIgnoreCase) && + GetVersion(cachedModel.ModelId) == latestVersion) + { + // Cached model is already at latest version + return false; + } } - var jsonPart = responseBody[jsonStart..]; + // Latest version not in cache - upgrade available + return true; - // Parse the JSON part - using var jsonDoc = JsonDocument.Parse(jsonPart); - var success = jsonDoc.RootElement.GetProperty("success").GetBoolean(); - var errorMessage = jsonDoc.RootElement.GetProperty("errorMessage").GetString(); + } - if (!success) + public async Task UpgradeModelAsync(string aliasOrModelId, string? token = null, CancellationToken ct = default) + { + // Get the latest model info; throw if not found + var modelInfo = await GetLatestModelInfoAsync(aliasOrModelId, ct) + ?? throw new ArgumentException($"Model '{aliasOrModelId}' was not found in the catalog."); + + // Attempt to download the model + try { - throw new InvalidOperationException($"Failed to download model: {errorMessage}"); + return await DownloadModelAsync(modelInfo.ModelId, token, false, ct); + } + catch (Exception ex) + { + throw new InvalidOperationException($"Failed to upgrade model '{aliasOrModelId}'.", ex); } - - return modelInfo; } public async Task LoadModelAsync(string aliasOrModelId, TimeSpan? timeout = null, CancellationToken ct = default) @@ -497,39 +510,125 @@ private async Task> FetchModelInfosAsync(IEnumerable ali private async Task> GetCatalogDictAsync(CancellationToken ct = default) { - if (_catalogDictionary == null) + if (_catalogDictionary != null) + { + return _catalogDictionary; + } + + var dict = new Dictionary(StringComparer.OrdinalIgnoreCase); + var models = await ListCatalogModelsAsync(ct); + foreach (var model in models) + { + dict[model.ModelId] = model; + } + + var aliasCandidates = new Dictionary>(StringComparer.OrdinalIgnoreCase); + foreach (var model in models) + { + if (!string.IsNullOrWhiteSpace(model.Alias)) + { + if (!aliasCandidates.TryGetValue(model.Alias, out var list)) + { + list = []; + aliasCandidates[model.Alias] = list; + } + list.Add(model); + } + } + + // For each alias, choose the best candidate based on _priorityMap and version + foreach (var kvp in aliasCandidates) { - var dict = new Dictionary(StringComparer.OrdinalIgnoreCase); - var models = await ListCatalogModelsAsync(ct); - foreach (var model in models) + var alias = kvp.Key; + List candidates = kvp.Value; + + ModelInfo bestCandidate = candidates.Aggregate((best, current) => { - dict[model.ModelId] = model; + // Get priorities or max int if not found + var bestPriority = _priorityMap.TryGetValue(best.Runtime.ExecutionProvider, out var bp) ? bp : int.MaxValue; + var currentPriority = _priorityMap.TryGetValue(current.Runtime.ExecutionProvider, out var cp) ? cp : int.MaxValue; - if (!string.IsNullOrWhiteSpace(model.Alias)) + if (currentPriority < bestPriority) { - if (!dict.TryGetValue(model.Alias, out var existing)) - { - dict[model.Alias] = model; - } - else - { - var currentPriority = _priorityMap.TryGetValue(model.Runtime.ExecutionProvider, out var cp) ? cp : int.MaxValue; - var existingPriority = _priorityMap.TryGetValue(existing.Runtime.ExecutionProvider, out var ep) ? ep : int.MaxValue; + return current; + } - if (currentPriority < existingPriority) - { - dict[model.Alias] = model; - } + if (currentPriority == bestPriority) + { + var bestVersion = GetVersion(best.ModelId); + var currentVersion = GetVersion(current.ModelId); + if (currentVersion > bestVersion) + { + return current; } } - } - _catalogDictionary = dict; + return best; + }); + + dict[alias] = bestCandidate; } + _catalogDictionary = dict; return _catalogDictionary; } + public async Task GetLatestModelInfoAsync(string aliasOrModelId, CancellationToken ct = default) + { + if (string.IsNullOrEmpty(aliasOrModelId)) + { + return null; + } + + var catalog = await GetCatalogDictAsync(ct); + + // If alias or id without version + if (!aliasOrModelId.Contains(':')) + { + // If exact match in catalog, return it directly + if (catalog.TryGetValue(aliasOrModelId, out var model)) + { + return model; + } + + // Otherwise, GetModelInfoAsync will get the latest version + return await GetModelInfoAsync(aliasOrModelId, ct); + } + else + { + // If ID with version, strip version and use GetModelInfoAsync to get the latest version + var idWithoutVersion = aliasOrModelId.Split(':')[0]; + return await GetModelInfoAsync(idWithoutVersion, ct); + } + } + + /// + /// Extracts the numeric version from a model ID string (e.g. "model-x:3" → 3). + /// + /// The model ID string. + /// The numeric version, or -1 if not found. + public static int GetVersion(string modelId) + { + if (string.IsNullOrEmpty(modelId)) + { + return -1; + } + + var parts = modelId.Split(':'); + if (parts.Length == 0) + { + return -1; + } + + var versionPart = parts[^1]; // last element + if (int.TryParse(versionPart, out var version)) + { + return version; + } + + return -1; + } + private static async Task EnsureServiceRunning(CancellationToken ct = default) { var startInfo = new ProcessStartInfo diff --git a/sdk/cs/src/FoundryModelInfo.cs b/sdk/cs/src/FoundryModelInfo.cs index 15b537ba..4cd752e5 100644 --- a/sdk/cs/src/FoundryModelInfo.cs +++ b/sdk/cs/src/FoundryModelInfo.cs @@ -99,6 +99,12 @@ public record ModelInfo [JsonPropertyName("parentModelUri")] public string ParentModelUri { get; init; } = default!; + + [JsonPropertyName("maxOutputTokens")] + public long MaxOutputTokens { get; init; } + + [JsonPropertyName("minFLVersion")] + public string MinFLVersion { get; init; } = default!; } internal sealed class DownloadRequest diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj index f9478889..75753c59 100644 --- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj +++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj @@ -14,7 +14,7 @@ enable enable True - 0.1.0 + 0.2.0 diff --git a/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs b/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs index 6fc380e3..84658302 100644 --- a/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs +++ b/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs @@ -131,7 +131,7 @@ public async Task GetModelInfoAsync_ReturnsModel_WhenModelExists() public async Task GetModelInfoAsync_CudaHigherPriorityThanCpuAndWebgpu() { // GIVEN - var phi4MiniGenericCpuModelId = "Phi-4-mini-instruct-generic-cpu"; + var phi4MiniGenericCpuModelId = "Phi-4-mini-instruct-generic-cpu:1"; var phi4MiniAlias = "phi-4-mini"; var phi4MiniGenericCpuModel = new ModelInfo { @@ -146,7 +146,7 @@ public async Task GetModelInfoAsync_CudaHigherPriorityThanCpuAndWebgpu() } }; - var phi4MiniWebGpuModelId = "Phi-4-mini-instruct-webgpu"; + var phi4MiniWebGpuModelId = "Phi-4-mini-instruct-webgpu:1"; var phi4MiniWebGpuModel = new ModelInfo { ModelId = phi4MiniWebGpuModelId, @@ -160,7 +160,7 @@ public async Task GetModelInfoAsync_CudaHigherPriorityThanCpuAndWebgpu() } }; - var phi4MiniCudaModelId = "Phi-4-mini-instruct-cuda-gpu"; + var phi4MiniCudaModelId = "Phi-4-mini-instruct-cuda-gpu:1"; var phi4MiniCudaModel = new ModelInfo { ModelId = phi4MiniCudaModelId, @@ -202,7 +202,7 @@ public async Task GetModelInfoAsync_CudaHigherPriorityThanCpuAndWebgpu() public async Task GetModelInfoAsync_QnnHigherPriorityThanCuda() { // GIVEN - var phi4MiniQnnModelId = "Phi-4-mini-instruct-qnn"; + var phi4MiniQnnModelId = "Phi-4-mini-instruct-qnn:1"; var phi4MiniAlias = "phi-4-mini"; var phi4MiniQnnModel = new ModelInfo { @@ -217,7 +217,7 @@ public async Task GetModelInfoAsync_QnnHigherPriorityThanCuda() } }; - var phi4MiniCudaModelId = "Phi-4-mini-instruct-cuda-gpu"; + var phi4MiniCudaModelId = "Phi-4-mini-instruct-cuda-gpu:1"; var phi4MiniCudaModel = new ModelInfo { ModelId = phi4MiniCudaModelId, @@ -688,6 +688,187 @@ public async Task DownloadModelWithProgressAsync_DownloadErrorProvidesErrorProgr Assert.Equal("Download error occurred.", p.ErrorMessage); } + [Fact] + public async Task UpgradeModelAsync_Success_ReturnsDownloadedModel() + { + var alias = "model-1"; + var modelId = "model-1:2"; + var token = "token"; + + // Mock /foundry/list for catalog models + var catalogModels = new List + { + new() { + ModelId = modelId, + Alias = alias, + Uri = "http://model.uri", + ProviderType = "openai", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = ExecutionProvider.CPUExecutionProvider } + } + }; + var catalogJson = JsonSerializer.Serialize(catalogModels); + _mockHttp.When(HttpMethod.Get, "/foundry/list") + .Respond("application/json", catalogJson); + + var downloadResponseJson = "{\"success\": true, \"errorMessage\": null}"; + _mockHttp.When(HttpMethod.Post, "/openai/download") + .Respond("application/json", downloadResponseJson); + + _mockHttp.When(HttpMethod.Get, "/openai/models") + .Respond("application/json", "[]"); + + // Act + var result = await _manager.UpgradeModelAsync(alias, token); + + // Assert + Assert.NotNull(result); + Assert.Equal(modelId, result.ModelId); + } + + [Fact] + public async Task UpgradeModelAsync_ModelNotFound_ThrowsArgumentException() + { + var alias = "missing-model"; + + // Mock /foundry/list to return an empty list (no models) + _mockHttp.When(HttpMethod.Get, "/foundry/list") + .Respond("application/json", "[]"); + + // Mock /openai/models for cached models (empty) + _mockHttp.When(HttpMethod.Get, "/openai/models") + .Respond("application/json", "[]"); + + // We don't expect download to be called, but mock anyway if needed + _mockHttp.When(HttpMethod.Post, "/openai/download") + .Respond("application/json", "{\"success\": true, \"errorMessage\": null}"); + + // Act & Assert + var ex = await Assert.ThrowsAsync(() => _manager.UpgradeModelAsync(alias)); + Assert.Contains("not found", ex.Message, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + + public async Task UpgradeModelAsync_DownloadReturnsNull_ThrowsInvalidOperationException() + { + var alias = "model-1"; + var modelId = "model-1:2"; + + // Mock /foundry/list to return the model info (so it's found) + var catalogModels = new List + { + new() + { + ModelId = modelId, + Alias = alias, + Uri = "http://model.uri", + ProviderType = "openai", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = ExecutionProvider.CPUExecutionProvider } + } + }; + var catalogJson = JsonSerializer.Serialize(catalogModels); + _mockHttp.When(HttpMethod.Get, "/foundry/list") + .Respond("application/json", catalogJson); + + // Mock /openai/models (empty cached) + _mockHttp.When(HttpMethod.Get, "/openai/models") + .Respond("application/json", "[]"); + + // Mock /openai/download to simulate failure (success: false) + var failedDownloadResponseJson = "{\"success\": false, \"errorMessage\": \"Simulated download failure.\"}"; + _mockHttp.When(HttpMethod.Post, "/openai/download") + .Respond("application/json", failedDownloadResponseJson); + + // Act & Assert + var ex = await Assert.ThrowsAsync(() => _manager.UpgradeModelAsync(alias)); + Assert.Contains("failed to upgrade", ex.Message, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task IsModelUpgradeableAsync_ReturnsTrue_WhenNewerVersionAvailable() + { + var alias = "model-1"; + var latestModelId = "model-1:2"; + + // Catalog contains the latest model version + var catalogModels = new List + { + new() + { + ModelId = latestModelId, + Alias = alias, + Uri = "http://model.uri", + ProviderType = "openai", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = ExecutionProvider.CPUExecutionProvider } + } + }; + var catalogJson = JsonSerializer.Serialize(catalogModels); + _mockHttp.When(HttpMethod.Get, "/foundry/list") + .Respond("application/json", catalogJson); + + // Cached models contain an older version + var cachedModels = new[] { "model-1:1" }; + var cachedModelsJson = JsonSerializer.Serialize(cachedModels); + _mockHttp.When(HttpMethod.Get, "/openai/models") + .Respond("application/json", cachedModelsJson); + + // Act + var result = await _manager.IsModelUpgradeableAsync(alias); + + // Assert + Assert.True(result); + } + + [Fact] + public async Task IsModelUpgradeableAsync_ReturnsFalse_WhenAlreadyLatestVersionCached() + { + var alias = "model-1"; + var latestModelId = "model-1:2"; + + // Catalog with latest version + var catalogModels = new List + { + new() + { + ModelId = latestModelId, + Alias = alias, + Uri = "http://model.uri", + ProviderType = "openai", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = ExecutionProvider.CPUExecutionProvider } + } + }; + var catalogJson = JsonSerializer.Serialize(catalogModels); + _mockHttp.When(HttpMethod.Get, "/foundry/list") + .Respond("application/json", catalogJson); + + // Cached model is already at the latest version + var cachedModels = new[] { latestModelId }; + var cachedModelsJson = JsonSerializer.Serialize(cachedModels); + _mockHttp.When(HttpMethod.Get, "/openai/models") + .Respond("application/json", cachedModelsJson); + + // Act + var result = await _manager.IsModelUpgradeableAsync(alias); + + // Assert + Assert.False(result); + } + + [Fact] + public async Task IsModelUpgradeableAsync_ReturnsFalse_WhenModelNotFoundInCatalog() + { + var alias = "missing-model"; + + _mockHttp.When(HttpMethod.Get, "/foundry/list") + .Respond("application/json", "[]"); + + // Act + var result = await _manager.IsModelUpgradeableAsync(alias); + + // Assert + Assert.False(result); + } + public void Dispose() { _client.Dispose(); From 6477a9f39110b5f82bf3ce34f8c93c01e163d29d Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Sun, 27 Jul 2025 23:10:19 -0700 Subject: [PATCH 11/15] update Rust API to support versioning --- sdk/rust/Cargo.toml | 2 +- sdk/rust/src/api.rs | 240 +++++++++++++++++++-------------- sdk/rust/src/client.rs | 2 +- sdk/rust/src/lib.rs | 10 +- sdk/rust/src/models.rs | 4 + sdk/rust/tests/mock_service.rs | 31 +++-- sdk/rust/tests/test_api.rs | 96 ++++++++++--- 7 files changed, 253 insertions(+), 132 deletions(-) diff --git a/sdk/rust/Cargo.toml b/sdk/rust/Cargo.toml index 3fb31e01..0fffaa0b 100644 --- a/sdk/rust/Cargo.toml +++ b/sdk/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "foundry-local" -version = "0.1.0" +version = "0.2.0" edition = "2021" description = "SDK for Microsoft Foundry Local service" license = "MIT" diff --git a/sdk/rust/src/api.rs b/sdk/rust/src/api.rs index 0adabd29..446c4186 100644 --- a/sdk/rust/src/api.rs +++ b/sdk/rust/src/api.rs @@ -83,6 +83,18 @@ impl FoundryLocalManagerBuilder { } } +/// Extract numeric version from ID (e.g. model-x:3 → 3) +/// +/// # Returns +/// +/// Numeric version as i32, or -1 if not found. +fn get_version(model_id: &str) -> i32 { + match model_id.rsplit(':').next() { + Some(version_str) => version_str.parse::().unwrap_or(-1), + None => -1, + } +} + impl FoundryLocalManager { /// Create a new builder for FoundryLocalManager. pub fn builder() -> FoundryLocalManagerBuilder { @@ -194,28 +206,23 @@ impl FoundryLocalManager { /// /// Dictionary of catalog models. async fn get_catalog_dict(&mut self) -> Result<&HashMap> { - if self.catalog_dict.is_some() { - return Ok(self.catalog_dict.as_ref().unwrap()); + if let Some(ref dict) = self.catalog_dict { + return Ok(dict); } let catalog_models = self.list_catalog_models().await?; let mut catalog_dict = HashMap::new(); - let mut alias_candidates: HashMap> = HashMap::new(); + let mut alias_candidates: HashMap> = HashMap::new(); - // Create dictionary of models by ID - for model in catalog_models { + for model in catalog_models.iter() { catalog_dict.insert(model.id.clone(), model.clone()); - } - - // Group models by alias - for model in catalog_models { alias_candidates .entry(model.alias.clone()) .or_default() - .push(model); + .push(model.clone()); } - // Define the preferred order of execution providers + // Set up priority order let mut preferred_order = vec![ ExecutionProvider::QNN, ExecutionProvider::CUDA, @@ -224,26 +231,33 @@ impl FoundryLocalManager { ]; if cfg!(not(target_os = "windows")) { - // Adjust order for non-Windows platforms - preferred_order.retain(|p| !matches!(p, ExecutionProvider::CPU)); + // Move CPU to the end for non-Windows + preferred_order.retain(|p| *p != ExecutionProvider::CPU); preferred_order.push(ExecutionProvider::CPU); } - let priority_map: HashMap<_, _> = preferred_order + let priority_map: HashMap = preferred_order .into_iter() .enumerate() .map(|(i, provider)| (provider, i)) .collect(); - // Choose the preferred model for each alias + // Choose the best candidate per alias for (alias, candidates) in alias_candidates { - if let Some(preferred) = candidates.into_iter().min_by_key(|model| { - priority_map - .get(&model.runtime) - .copied() - .unwrap_or(usize::MAX) + if let Some(best) = candidates.into_iter().max_by(|a, b| { + // Compare priority (lower index = higher priority) + let pa = priority_map.get(&a.runtime).copied().unwrap_or(usize::MAX); + let pb = priority_map.get(&b.runtime).copied().unwrap_or(usize::MAX); + match pa.cmp(&pb) { + std::cmp::Ordering::Less => std::cmp::Ordering::Greater, + std::cmp::Ordering::Greater => std::cmp::Ordering::Less, + std::cmp::Ordering::Equal => { + // Same priority → compare version + get_version(&a.id).cmp(&get_version(&b.id)) + } + } }) { - catalog_dict.insert(alias, preferred.clone()); + catalog_dict.insert(alias, best); } } @@ -274,16 +288,80 @@ impl FoundryLocalManager { ) -> Result { let catalog_dict = self.get_catalog_dict().await?; - match catalog_dict.get(alias_or_model_id) { - Some(model) => Ok(model.clone()), - None if raise_on_not_found => Err(anyhow!( - "Model {} not found in the catalog", - alias_or_model_id - )), - None => Err(anyhow!( - "Model {} not found in the catalog", - alias_or_model_id - )), + // Try to find exact match (alias or id) + if let Some(model_info) = catalog_dict.get(alias_or_model_id) { + return Ok(model_info.clone()); + } + + // If alias_or_model_id does NOT contain version suffix (":") + if !alias_or_model_id.contains(':') { + // Search for model IDs starting with alias_or_model_id + ":" + let prefix = format!("{}:", alias_or_model_id); + let mut best_version = -1; + let mut best_model: Option = None; + + for (key, info) in catalog_dict.iter() { + if key.starts_with(&prefix) { + let version = get_version(key); + if version > best_version { + best_version = version; + best_model = Some(info.clone()); + } + } + } + + if let Some(model) = best_model { + return Ok(model); + } + } + + if raise_on_not_found { + Err(anyhow!("Model {} not found in the catalog", alias_or_model_id)) + } else { + Err(anyhow!("Model {} not found in the catalog", alias_or_model_id)) + } + } + + /// Get the latest model information by alias or model ID. + /// The difference from get_model_info is that this method will return the latest version of the model + /// even when you pass it a model id that contains a version suffix. + /// + /// # Arguments + /// + /// * `alias_or_model_id` - Alias or Model ID. If it is an alias, the most preferred model will be returned. + /// * `raise_on_not_found` - If true, raise an error if the model is not found. Default is false. + /// + /// # Returns + /// + /// Latest model information, or None if not found and raise_on_not_found is false. + pub async fn get_latest_model_info( + &mut self, + alias_or_model_id: &str, + raise_on_not_found: bool, + ) -> Result { + if alias_or_model_id.is_empty() { + if raise_on_not_found { + return Err(anyhow!("The provided model alias or ID was empty.")); + } else { + return Err(anyhow!("Model alias or ID was empty and raise_on_not_found is false.")); + } + } + + let catalog = self.get_catalog_dict().await?; + + if !alias_or_model_id.contains(':') { + // Alias or ID without version suffix + if let Some(model) = catalog.get(alias_or_model_id) { + // Found directly in catalog (alias or id) + Ok(model.clone()) + } else { + // Fallback: try get_model_info which can look for latest version of ID without version + self.get_model_info(alias_or_model_id, raise_on_not_found).await + } + } else { + // ID with version suffix: strip version and get latest by ID without version + let id_without_version = alias_or_model_id.split(':').next().unwrap_or(""); + self.get_model_info(id_without_version, raise_on_not_found).await } } @@ -397,99 +475,65 @@ impl FoundryLocalManager { Ok(model_info) } - /// Checks if a newer version of a model is available. + /// Check if a newer version of a model is available. /// /// # Arguments /// - /// * `alias_or_model_id` - The alias or model ID to check for upgrades. + /// * `alias_or_model_id` - Alias or Model ID. /// /// # Returns /// - /// True if a newer version is available, otherwise false. + /// bool: True if a newer version is available, False otherwise. pub async fn is_model_upgradeable( &mut self, - alias_or_model_id: &str + alias_or_model_id: &str, ) -> Result { - let model_info = self.get_model_info(alias_or_model_id, true).await?; - let url = format!("/openai/upgradeable/{}", model_info.id); + info!("Checking if model '{}' is upgradeable", alias_or_model_id); - let client = self.client()?; - let response: Option = client.get(&url, None).await?; + // Get the latest model info (throws if not found) + let latest_model_info = self + .get_latest_model_info(alias_or_model_id, true) + .await?; + + let latest_version = get_version(&latest_model_info.id); + if latest_version == -1 { + return Ok(false); // Invalid version format + } - let data = response.ok_or_else(|| anyhow!("Failed to check model upgrade availability"))?; - data["upgradeable"] - .as_bool() - .ok_or_else(|| anyhow!("Invalid upgrade response format")) + let cached_models = self.list_cached_models().await?; + for cached_model in cached_models { + if cached_model.id == latest_model_info.id + && get_version(&cached_model.id) == latest_version + { + return Ok(false); // Already cached latest version + } + } + + Ok(true) // Latest version not in cache } - /// Upgrades a model to its latest version. + /// Downloads the latest version of a model to the local cache, if it is not already cached. /// /// # Arguments /// /// * `alias_or_model_id` - The alias or model ID to upgrade. - /// * `token` - Optional token for authentication. + /// * `token` - Optional authentication token. /// - /// # Returns + /// # Errors /// - /// The upgraded model information. + /// Downloaded model information. pub async fn upgrade_model( &mut self, alias_or_model_id: &str, token: Option<&str>, ) -> Result { - let model_info = self.get_model_info(alias_or_model_id, true).await?; - info!( - "Upgrading model: {} ({})", - model_info.alias, model_info.id - ); - - // Create the upgrade body similar to the JS implementation - let mut body = serde_json::json!({ - "model": { - "Name": model_info.id, - "Uri": model_info.uri, - "Publisher": model_info.publisher, - "ProviderType": if model_info.provider == "AzureFoundry" { - "AzureFoundryLocal" - } else { - model_info.provider - }, - "PromptTemplate": model_info.prompt_template, - }, - "IgnorePipeReport": true - }); - - // Add token if provided - if let Some(t) = token { - body["token"] = Value::String(t.to_string()); - } - - let client = self.client()?; - let response: Value = client - .post_with_progress("/openai/upgrade", Some(body)) + // Get the latest model info (this also validates existence) + let model_info = self + .get_latest_model_info(alias_or_model_id, true) .await?; - // Check if the upgrade was successful - if !response["success"].as_bool().unwrap_or(false) { - let error_msg = response["error"] - .as_str() - .unwrap_or("Unknown error"); - - return Err(anyhow!( - "Failed to upgrade model with alias '{}' and ID '{}': {}", - model_info.alias, - model_info.id, - error_msg - )); - } - - // Refresh the model cache to get the latest information - self.refresh_catalog(); - - // Get the updated model information - let updated_model_info = self.get_model_info(&model_info.id, true).await?; - - Ok(updated_model_info) + // Download the model and discard the result + self.download_model(&model_info.id, token, false).await } /// Load a model. diff --git a/sdk/rust/src/client.rs b/sdk/rust/src/client.rs index fb964aaf..8779d4f4 100644 --- a/sdk/rust/src/client.rs +++ b/sdk/rust/src/client.rs @@ -46,7 +46,7 @@ impl HttpClient { /// A new HttpClient instance. pub fn new(host: &str, timeout_secs: Option) -> Self { let timeout = timeout_secs.map(Duration::from_secs); - let mut client_builder = Client::builder().user_agent("foundry-local-rust-sdk/0.1.0"); + let mut client_builder = Client::builder().user_agent("foundry-local-rust-sdk/0.2.0"); if let Some(timeout) = timeout { client_builder = client_builder.timeout(timeout); diff --git a/sdk/rust/src/lib.rs b/sdk/rust/src/lib.rs index 6cec3ea2..a563c980 100644 --- a/sdk/rust/src/lib.rs +++ b/sdk/rust/src/lib.rs @@ -12,7 +12,7 @@ //! //! ## Example //! -//! ```rust +//! ```rust, ignore //! use foundry_local::FoundryLocalManager; //! use anyhow::Result; //! @@ -37,8 +37,12 @@ //! .await?; //! //! let result = response.json::().await?; -//! println!("{}", result["choices"][0]["message"]["content"]); -//! +//! if let Some(content) = result["choices"][0]["message"]["content"].as_str() { +//! println!("{}", content); +//! } else { +//! println!("No content found in response."); +//! } +//! //! Ok(()) //! } //! ``` diff --git a/sdk/rust/src/models.rs b/sdk/rust/src/models.rs index a6ed2194..b6aa6fe6 100644 --- a/sdk/rust/src/models.rs +++ b/sdk/rust/src/models.rs @@ -98,6 +98,10 @@ pub struct FoundryListResponseModel { pub license_description: String, #[serde(rename = "parentModelUri")] pub parent_model_uri: String, + #[serde(rename = "maxOutputTokens")] + pub max_output_tokens: i32, + #[serde(rename = "minFLVersion")] + pub min_fl_version: String, } /// Model information. diff --git a/sdk/rust/tests/mock_service.rs b/sdk/rust/tests/mock_service.rs index d36fdb7a..e7233b1d 100644 --- a/sdk/rust/tests/mock_service.rs +++ b/sdk/rust/tests/mock_service.rs @@ -26,11 +26,11 @@ impl Default for MockState { Self { catalog_models: vec![ FoundryModelInfo { - id: "mock-model-1".to_string(), - alias: "mock-small".to_string(), + id: "Phi-4-mini-instruct-generic-cpu:1".to_string(), + alias: "phi-4-mini".to_string(), runtime: ExecutionProvider::CPU, file_size_mb: 100, - uri: "https://mock-uri/model1".to_string(), + uri: "azureml://registries/azureml/models/Phi-4-mini-instruct-generic-cpu/versions/1".to_string(), version: "1.0".to_string(), prompt_template: serde_json::json!({}), provider: "MockProvider".to_string(), @@ -39,11 +39,11 @@ impl Default for MockState { task: "text-generation".to_string(), }, FoundryModelInfo { - id: "mock-model-2".to_string(), - alias: "mock-medium".to_string(), + id: "qwen2.5-0.5b-instruct-cuda-gpu:1".to_string(), + alias: "qwen2.5-0.5b".to_string(), runtime: ExecutionProvider::CUDA, file_size_mb: 500, - uri: "https://mock-uri/model2".to_string(), + uri: "azureml://registries/azureml/models/qwen2.5-0.5b-instruct-cuda-gpu/versions/1".to_string(), version: "1.0".to_string(), prompt_template: serde_json::json!({}), provider: "MockProvider".to_string(), @@ -51,8 +51,21 @@ impl Default for MockState { license: "MIT".to_string(), task: "text-generation".to_string(), }, + FoundryModelInfo { + id: "qwen2.5-0.5b-instruct-cuda-gpu:2".to_string(), + alias: "qwen2.5-0.5b".to_string(), + runtime: ExecutionProvider::CUDA, + file_size_mb: 600, + uri: "azureml://registries/azureml/models/qwen2.5-0.5b-instruct-cuda-gpu/versions/2".to_string(), + version: "2.0".to_string(), + prompt_template: serde_json::json!({}), + provider: "MockProvider".to_string(), + publisher: "MockPublisher".to_string(), + license: "MIT".to_string(), + task: "text-generation".to_string(), + }, ], - cached_models: vec!["mock-model-1".to_string()], + cached_models: vec!["qwen2.5-0.5b-instruct-cuda-gpu:1".to_string()], loaded_models: vec![], cache_location: "/tmp/mock-cache".to_string(), } @@ -86,7 +99,9 @@ async fn list_catalog(State(state): State) -> impl IntoResponse { "supportsToolCalling": false, "license": model.license, "licenseDescription": "", - "parentModelUri": "" + "parentModelUri": "", + "maxOutputTokens": 1024, + "minFLVersion": "1.0.0" }) }) .collect::>(); diff --git a/sdk/rust/tests/test_api.rs b/sdk/rust/tests/test_api.rs index db54d81c..bfd7137a 100644 --- a/sdk/rust/tests/test_api.rs +++ b/sdk/rust/tests/test_api.rs @@ -29,13 +29,16 @@ async fn test_list_catalog_models() { let catalog_models = manager.list_catalog_models().await.unwrap(); // Verify the result - assert_eq!(catalog_models.len(), 2); - assert_eq!(catalog_models[0].id, "mock-model-1"); - assert_eq!(catalog_models[0].alias, "mock-small"); + assert_eq!(catalog_models.len(), 3); + assert_eq!(catalog_models[0].id, "Phi-4-mini-instruct-generic-cpu:1"); + assert_eq!(catalog_models[0].alias, "phi-4-mini"); assert_eq!(catalog_models[0].runtime, ExecutionProvider::CPU); - assert_eq!(catalog_models[1].id, "mock-model-2"); - assert_eq!(catalog_models[1].alias, "mock-medium"); + assert_eq!(catalog_models[1].id, "qwen2.5-0.5b-instruct-cuda-gpu:1"); + assert_eq!(catalog_models[1].alias, "qwen2.5-0.5b"); assert_eq!(catalog_models[1].runtime, ExecutionProvider::CUDA); + assert_eq!(catalog_models[2].id, "qwen2.5-0.5b-instruct-cuda-gpu:2"); + assert_eq!(catalog_models[2].alias, "qwen2.5-0.5b"); + assert_eq!(catalog_models[2].runtime, ExecutionProvider::CUDA); // Shutdown the mock server shutdown_tx.send(()).unwrap(); @@ -50,14 +53,14 @@ async fn test_get_model_info() { let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; // Test getting model info by ID - let model_info = manager.get_model_info("mock-model-1", false).await.unwrap(); - assert_eq!(model_info.id, "mock-model-1"); - assert_eq!(model_info.alias, "mock-small"); + let model_info = manager.get_model_info("Phi-4-mini-instruct-generic-cpu:1", false).await.unwrap(); + assert_eq!(model_info.id, "Phi-4-mini-instruct-generic-cpu:1"); + assert_eq!(model_info.alias, "phi-4-mini"); // Test getting model info by alias - let model_info = manager.get_model_info("mock-small", false).await.unwrap(); - assert_eq!(model_info.id, "mock-model-1"); - assert_eq!(model_info.alias, "mock-small"); + let model_info = manager.get_model_info("qwen2.5-0.5b", false).await.unwrap(); + assert_eq!(model_info.id, "qwen2.5-0.5b-instruct-cuda-gpu:2"); + assert_eq!(model_info.alias, "qwen2.5-0.5b"); // Shutdown the mock server shutdown_tx.send(()).unwrap(); @@ -90,7 +93,7 @@ async fn test_list_cached_models() { // Test listing cached models let cached_models = manager.list_cached_models().await.unwrap(); assert_eq!(cached_models.len(), 1); - assert_eq!(cached_models[0].id, "mock-model-1"); + assert_eq!(cached_models[0].id, "qwen2.5-0.5b-instruct-cuda-gpu:1"); // Shutdown the mock server shutdown_tx.send(()).unwrap(); @@ -106,21 +109,72 @@ async fn test_download_model() { // Test downloading a model let model_info = manager - .download_model("mock-model-2", None, false) + .download_model("qwen2.5-0.5b", None, false) .await .unwrap(); - assert_eq!(model_info.id, "mock-model-2"); + assert_eq!(model_info.id, "qwen2.5-0.5b-instruct-cuda-gpu:2"); - // Verify the model is now cached + // Verify latest version of the model is now also cached let cached_models = manager.list_cached_models().await.unwrap(); assert_eq!(cached_models.len(), 2); - assert!(cached_models.iter().any(|m| m.id == "mock-model-1")); - assert!(cached_models.iter().any(|m| m.id == "mock-model-2")); + assert!(cached_models.iter().any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:1")); + assert!(cached_models.iter().any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:2")); // Shutdown the mock server shutdown_tx.send(()).unwrap(); } +#[tokio::test] +async fn test_is_model_upgradeable() { + // Start the mock server + let (server_uri, shutdown_tx) = start_mock_server().await; + + // Create a manager with the mock server URI + let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; + + // When no version is in the cache + let is_upgradeable = manager.is_model_upgradeable("phi-4-mini").await.unwrap(); + assert!(is_upgradeable, "Expected upgradeable because latest version is not cached"); + + // When the latest version is not in the cache + let is_upgradeable = manager.is_model_upgradeable("qwen2.5-0.5b").await.unwrap(); + assert!(is_upgradeable, "Expected upgradeable because latest version is not cached"); + + // Shutdown the mock server + shutdown_tx.send(()).unwrap(); +} + +#[tokio::test] +async fn test_upgrade_model_success() { + let (server_uri, shutdown_tx) = start_mock_server().await; + let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; + + // This should trigger download of latest model "qwen2.5-0.5b-instruct-cuda-gpu:2" + let model_info = manager.upgrade_model("qwen2.5-0.5b", None).await.unwrap(); + + // Assert returned model info is correct (example) + assert_eq!(model_info.id, "qwen2.5-0.5b-instruct-cuda-gpu:2"); + + shutdown_tx.send(()).unwrap(); +} + +#[tokio::test] +async fn test_upgrade_model_not_found() { + let (server_uri, shutdown_tx) = start_mock_server().await; + let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; + + // Call upgrade_model without unwrap to get Result + let result = manager.upgrade_model("non-existent-model", None).await; + + // Assert it returned an error + assert!(result.is_err()); + + let err_msg = format!("{}", result.unwrap_err()); + assert!(err_msg.contains("not found")); + + shutdown_tx.send(()).unwrap(); +} + #[tokio::test] async fn test_load_and_unload_model() { // Start the mock server @@ -130,16 +184,16 @@ async fn test_load_and_unload_model() { let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; // Test loading a model - let model_info = manager.load_model("mock-model-1", Some(300)).await.unwrap(); - assert_eq!(model_info.id, "mock-model-1"); + let model_info = manager.load_model("phi-4-mini", Some(300)).await.unwrap(); + assert_eq!(model_info.id, "Phi-4-mini-instruct-generic-cpu:1"); // Verify the model is loaded let loaded_models = manager.list_loaded_models().await.unwrap(); assert_eq!(loaded_models.len(), 1); - assert_eq!(loaded_models[0].id, "mock-model-1"); + assert_eq!(loaded_models[0].id, "Phi-4-mini-instruct-generic-cpu:1"); // Test unloading the model - manager.unload_model("mock-model-1", false).await.unwrap(); + manager.unload_model("phi-4-mini", false).await.unwrap(); // Verify the model is unloaded let loaded_models = manager.list_loaded_models().await.unwrap(); From 603d8f002ee121b3b471802b6e85dda183159eec Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Sun, 27 Jul 2025 23:16:31 -0700 Subject: [PATCH 12/15] cargo format --- sdk/rust/src/api.rs | 33 ++++++++++++++++++--------------- sdk/rust/tests/test_api.rs | 23 ++++++++++++++++++----- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/sdk/rust/src/api.rs b/sdk/rust/src/api.rs index 446c4186..a2762384 100644 --- a/sdk/rust/src/api.rs +++ b/sdk/rust/src/api.rs @@ -316,9 +316,15 @@ impl FoundryLocalManager { } if raise_on_not_found { - Err(anyhow!("Model {} not found in the catalog", alias_or_model_id)) + Err(anyhow!( + "Model {} not found in the catalog", + alias_or_model_id + )) } else { - Err(anyhow!("Model {} not found in the catalog", alias_or_model_id)) + Err(anyhow!( + "Model {} not found in the catalog", + alias_or_model_id + )) } } @@ -343,7 +349,9 @@ impl FoundryLocalManager { if raise_on_not_found { return Err(anyhow!("The provided model alias or ID was empty.")); } else { - return Err(anyhow!("Model alias or ID was empty and raise_on_not_found is false.")); + return Err(anyhow!( + "Model alias or ID was empty and raise_on_not_found is false." + )); } } @@ -356,12 +364,14 @@ impl FoundryLocalManager { Ok(model.clone()) } else { // Fallback: try get_model_info which can look for latest version of ID without version - self.get_model_info(alias_or_model_id, raise_on_not_found).await + self.get_model_info(alias_or_model_id, raise_on_not_found) + .await } } else { // ID with version suffix: strip version and get latest by ID without version let id_without_version = alias_or_model_id.split(':').next().unwrap_or(""); - self.get_model_info(id_without_version, raise_on_not_found).await + self.get_model_info(id_without_version, raise_on_not_found) + .await } } @@ -484,16 +494,11 @@ impl FoundryLocalManager { /// # Returns /// /// bool: True if a newer version is available, False otherwise. - pub async fn is_model_upgradeable( - &mut self, - alias_or_model_id: &str, - ) -> Result { + pub async fn is_model_upgradeable(&mut self, alias_or_model_id: &str) -> Result { info!("Checking if model '{}' is upgradeable", alias_or_model_id); // Get the latest model info (throws if not found) - let latest_model_info = self - .get_latest_model_info(alias_or_model_id, true) - .await?; + let latest_model_info = self.get_latest_model_info(alias_or_model_id, true).await?; let latest_version = get_version(&latest_model_info.id); if latest_version == -1 { @@ -528,9 +533,7 @@ impl FoundryLocalManager { token: Option<&str>, ) -> Result { // Get the latest model info (this also validates existence) - let model_info = self - .get_latest_model_info(alias_or_model_id, true) - .await?; + let model_info = self.get_latest_model_info(alias_or_model_id, true).await?; // Download the model and discard the result self.download_model(&model_info.id, token, false).await diff --git a/sdk/rust/tests/test_api.rs b/sdk/rust/tests/test_api.rs index bfd7137a..843d14df 100644 --- a/sdk/rust/tests/test_api.rs +++ b/sdk/rust/tests/test_api.rs @@ -53,7 +53,10 @@ async fn test_get_model_info() { let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await; // Test getting model info by ID - let model_info = manager.get_model_info("Phi-4-mini-instruct-generic-cpu:1", false).await.unwrap(); + let model_info = manager + .get_model_info("Phi-4-mini-instruct-generic-cpu:1", false) + .await + .unwrap(); assert_eq!(model_info.id, "Phi-4-mini-instruct-generic-cpu:1"); assert_eq!(model_info.alias, "phi-4-mini"); @@ -117,8 +120,12 @@ async fn test_download_model() { // Verify latest version of the model is now also cached let cached_models = manager.list_cached_models().await.unwrap(); assert_eq!(cached_models.len(), 2); - assert!(cached_models.iter().any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:1")); - assert!(cached_models.iter().any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:2")); + assert!(cached_models + .iter() + .any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:1")); + assert!(cached_models + .iter() + .any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:2")); // Shutdown the mock server shutdown_tx.send(()).unwrap(); @@ -134,11 +141,17 @@ async fn test_is_model_upgradeable() { // When no version is in the cache let is_upgradeable = manager.is_model_upgradeable("phi-4-mini").await.unwrap(); - assert!(is_upgradeable, "Expected upgradeable because latest version is not cached"); + assert!( + is_upgradeable, + "Expected upgradeable because latest version is not cached" + ); // When the latest version is not in the cache let is_upgradeable = manager.is_model_upgradeable("qwen2.5-0.5b").await.unwrap(); - assert!(is_upgradeable, "Expected upgradeable because latest version is not cached"); + assert!( + is_upgradeable, + "Expected upgradeable because latest version is not cached" + ); // Shutdown the mock server shutdown_tx.send(()).unwrap(); From 9b20fb40e07de9a5e5d0444f2d8d688c0ad0d8d6 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Mon, 28 Jul 2025 16:56:46 -0700 Subject: [PATCH 13/15] update hello-foundry-local sample, make new Response properties optional --- samples/python/hello-foundry-local/src/app.py | 2 +- sdk/python/foundry_local/models.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/samples/python/hello-foundry-local/src/app.py b/samples/python/hello-foundry-local/src/app.py index dd43a2a7..fb78b724 100644 --- a/samples/python/hello-foundry-local/src/app.py +++ b/samples/python/hello-foundry-local/src/app.py @@ -6,7 +6,7 @@ # By using an alias, the most suitable model will be downloaded # to your end-user's device. -alias = "phi-3.5-mini" +alias = "qwen2.5-coder-0.5b-instruct-generic-cpu:3" # Create a FoundryLocalManager instance. This will start the Foundry # Local service if it is not already running and load the specified model. diff --git a/sdk/python/foundry_local/models.py b/sdk/python/foundry_local/models.py index c4018c07..eb3847c6 100644 --- a/sdk/python/foundry_local/models.py +++ b/sdk/python/foundry_local/models.py @@ -7,6 +7,7 @@ import sys from pydantic import BaseModel, Field +from typing import Optional if sys.version_info >= (3, 11): from enum import StrEnum @@ -78,8 +79,8 @@ class FoundryListResponseModel(BaseModel): license: str = Field(..., description="License of the model") licenseDescription: str = Field(..., description="License description of the model") parentModelUri: str = Field(..., description="Parent model URI of the model") - maxOutputTokens: int = Field(..., description="Maximum output tokens for the model") - minFLVersion: str = Field(..., description="Minimum Foundry Local version required for the model") + maxOutputTokens: Optional[int] = Field(..., description="Maximum output tokens for the model") + minFLVersion: Optional[str] = Field(..., description="Minimum Foundry Local version required for the model") class FoundryModelInfo(BaseModel): From 35cadcb0ec11757e78e58f019d9c647d62e4d2db Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Mon, 28 Jul 2025 17:08:15 -0700 Subject: [PATCH 14/15] Update model used in js/hello-foundry-local --- samples/js/hello-foundry-local/src/app.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/js/hello-foundry-local/src/app.js b/samples/js/hello-foundry-local/src/app.js index 6dc41a31..2fdae5d5 100644 --- a/samples/js/hello-foundry-local/src/app.js +++ b/samples/js/hello-foundry-local/src/app.js @@ -8,7 +8,7 @@ import { FoundryLocalManager } from "foundry-local-sdk"; // to your end-user's device. // TIP: You can find a list of available models by running the // following command in your terminal: `foundry model list`. -const alias = "phi-3.5-mini"; +const alias = "qwen2.5-coder-0.5b-instruct-generic-cpu:3"; // Create a FoundryLocalManager instance. This will start the Foundry // Local service if it is not already running. From 155ba3a49a1498dd78bc2a362423135c67634a77 Mon Sep 17 00:00:00 2001 From: Alex Marin Date: Tue, 29 Jul 2025 11:11:51 -0700 Subject: [PATCH 15/15] Update _get_latest_model_info implementation --- sdk/python/foundry_local/api.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py index 56a736a4..3c998161 100644 --- a/sdk/python/foundry_local/api.py +++ b/sdk/python/foundry_local/api.py @@ -256,20 +256,9 @@ def _get_latest_model_info(self, alias_or_model_id: str, raise_on_not_found: boo raise ValueError("The provided nodel alias or ID was empty.") return None - catalog = self._get_catalog_dict() - - # if alias or id without version - if ":" not in alias_or_model_id: - # if alias - if catalog[alias_or_model_id] is not None: - return catalog[alias_or_model_id] - else: - # if id without version, then get_model_info will get the latest version - return self.get_model_info(alias_or_model_id, raise_on_not_found) - else: - # if id with version, remove the ":" suffix and use the name to get the latest model - id_without_version = alias_or_model_id.split(":")[0] - return self.get_model_info(id_without_version, raise_on_not_found) + # remove the ":" suffix if it exists, and use it to get the latest model + alias_or_name_without_version = alias_or_model_id.split(":")[0] + return self.get_model_info(alias_or_name_without_version, raise_on_not_found) # Cache management api def get_cache_location(self):