From 1e69f08dc249bcffc9a31963b1bfd7be3d1177f6 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Tue, 8 Jul 2025 08:59:31 -0700
Subject: [PATCH 01/15] Add JS isModelUpgradable and upgradeModel

---
 sdk/js/src/base.ts  | 53 ++++++++++++++++++++++++++++++++++++++++++++-
 sdk/js/src/types.ts | 30 +++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 1 deletion(-)
diff --git a/sdk/js/src/base.ts b/sdk/js/src/base.ts
index 60a6ecc0..c7f38c89 100644
--- a/sdk/js/src/base.ts
+++ b/sdk/js/src/base.ts
@@ -3,7 +3,7 @@
 
 import * as client from './client.js'
 import { ExecutionProvider } from './types.js'
-import type { DownloadBody, Fetch, FoundryModelInfo, FoundryListResponseModel } from './types.js'
+import type { DownloadBody, UpgradeBody, Fetch, FoundryModelInfo, FoundryListResponseModel } from './types.js'
 
 /**
  * Utility function to detect if the platform is Windows.
@@ -271,6 +271,57 @@ export class FoundryLocalManager {
 
     return modelInfo
   }
+
+  /**
+   * Checks if a newer version of a model is available.
+   * @param {string} aliasOrModelId - The alias or model ID.
+   * @returns {Promise<boolean>} True if a newer version is available, otherwise false.
+   */
+  async isModelUpgradable(aliasOrModelId: string): Promise<boolean> {
+    const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradable/${aliasOrModelId}`)
+    const data = await response.json()
+    return data.upgradable
+  }
+
+  /**
+   * Downloads the latest version of a model to the local cache.
+   * @param {string} aliasOrModelId - The alias or model ID.
+   * @param {string} [token] - Optional token for authentication.
+   * @param {(progress: number) => void} [onProgress] - Callback for download progress percentage.
+   * @returns {Promise<FoundryModelInfo>} The upgraded model information.
+   */
+  async upgradeModel(
+    aliasOrModelId: string,
+    token?: string,
+    onProgress?: (progress: number) => void,
+  ): Promise<FoundryModelInfo> {
+    const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo
+
+    const upgradeBody: UpgradeBody = {
+      Name: modelInfo.id,
+      Uri: modelInfo.uri,
+      Publisher: modelInfo.publisher,
+      ProviderType: modelInfo.provider === 'AzureFoundry' ? `${modelInfo.provider}Local` : modelInfo.provider,
+      PromptTemplate: modelInfo.promptTemplate,
+    }
+
+    const body = {
+      model: upgradeBody,
+      ...(token && { token }),
+      IgnorePipeReport: true,
+    }
+
+    const data = await client.postWithProgress(this.fetch, `${this.serviceUrl}/openai/upgrade`, body, onProgress)
+
+    if (!data.success) {
+      throw new Error(
+        `Failed to upgrade model with alias '${modelInfo.alias}' and ID '${modelInfo.id}': ${data.error}`,
+      )
+    }
+
+    return modelInfo
+  }
+
   /**
    * Loads a model.
    * @param {string} aliasOrModelId - The alias or model ID.
diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts
index ade00e1c..1467bac5 100644
--- a/sdk/js/src/types.ts
+++ b/sdk/js/src/types.ts
@@ -219,3 +219,33 @@ export interface DownloadBody {
    */
   PromptTemplate: Record<string, string>
 }
+
+/**
+ * Interface representing the body of an upgrade request.
+ */
+export interface UpgradeBody {
+  /**
+   * The name of the model.
+   */
+  Name: string
+
+  /**
+   * The URI of the model.
+   */
+  Uri: string
+
+  /**
+   * The publisher of the model.
+   */
+  Publisher: string
+
+  /**
+   * The provider type of the model.
+   */
+  ProviderType: string
+
+  /**
+   * The prompt template associated with the model.
+   */
+  PromptTemplate: Record<string, string>
+}

From 0c73bc87cbf648076aba1294a0a9880bdb5e9b11 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Tue, 8 Jul 2025 09:47:41 -0700
Subject: [PATCH 02/15] small updates to JS upgrade API

---
 sdk/js/src/base.ts | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sdk/js/src/base.ts b/sdk/js/src/base.ts
index c7f38c89..7c6d6adc 100644
--- a/sdk/js/src/base.ts
+++ b/sdk/js/src/base.ts
@@ -278,7 +278,9 @@ export class FoundryLocalManager {
    * @returns {Promise<boolean>} True if a newer version is available, otherwise false.
    */
   async isModelUpgradable(aliasOrModelId: string): Promise<boolean> {
-    const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradable/${aliasOrModelId}`)
+    const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo
+
+    const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradable/${modelInfo.id}`)
     const data = await response.json()
     return data.upgradable
   }
@@ -301,7 +303,7 @@ export class FoundryLocalManager {
       Name: modelInfo.id,
       Uri: modelInfo.uri,
       Publisher: modelInfo.publisher,
-      ProviderType: modelInfo.provider === 'AzureFoundry' ? `${modelInfo.provider}Local` : modelInfo.provider,
+      ProviderType: modelInfo.provider === 'AzureFoundry' ? `AzureFoundryLocal` : modelInfo.provider,
       PromptTemplate: modelInfo.promptTemplate,
     }
 

From a3ba62d64b8dc6e6415ae115a41cad2f3abc8793 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Tue, 8 Jul 2025 09:48:07 -0700
Subject: [PATCH 03/15] Add Python upgrade_model and is_model_upgradable APIs

---
 sdk/python/foundry_local/api.py | 56 +++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py
index 92909e88..7d73765d 100644
--- a/sdk/python/foundry_local/api.py
+++ b/sdk/python/foundry_local/api.py
@@ -271,6 +271,62 @@ def download_model(self, alias_or_model_id: str, token: str | None = None, force
             )
         return model_info
 
+    def is_model_upgradable(self, alias_or_model_id: str) -> bool:
+        """
+        Check if a newer version of a model is available.
+
+        Args:
+            alias_or_model_id (str): Alias or Model ID.
+
+        Returns:
+            bool: True if a newer version is available, False otherwise.
+
+        Raises:
+            ValueError: If the model is not found in the catalog.
+        """
+        model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True)
+        logger.info("Checking if model '%s' (ID: '%s') is upgradable...", model_info.alias, model_info.id)
+
+        response = self.httpx_client.get(f"/foundry/upgradable/{model_info.id}")
+        return response.get("upgradable", False)
+
+    def upgrade_model(self, alias_or_model_id: str, token: str | None = None) -> None:
+        """
+        Download the latest version of a model to the local cache.
+
+        Args:
+            alias_or_model_id (str): Alias or Model ID.
+            token (str | None): Optional token for authentication.
+
+        Raises:
+            ValueError: If the model is not found in the catalog.
+            RuntimeError: If the model upgrade fails.
+        """
+        model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True)
+        logger.info("Upgrading model with alias '%s' and ID '%s'...", model_info.alias, model_info.id)
+
+        upgrade_body = {
+            "Name": model_info.id,
+            "Uri": model_info.uri,
+            "Publisher": model_info.publisher,
+            "ProviderType": "AzureFoundryLocal" if model_info.provider == "AzureFoundry" else model_info.provider,
+            "PromptTemplate": model_info.prompt_template,
+        }
+        body={
+            "model": upgrade_body,
+            "token": token,
+            "IgnorePipeReport": True,
+        },
+
+        response_body = self.httpx_client.post_with_progress("/foundry/upgrade", body=body)
+
+        if not response_body.get("success", False):
+            raise RuntimeError(
+                f"Failed to upgrade model with error: {response_body.get('errorMessage', 'Unknown error')}"
+            )
+
+        return model_info
+
     def load_model(self, alias_or_model_id: str, ttl: int = 600) -> FoundryModelInfo:
         """
         Load a model.

From c6c73730cdd090503c093a670b5152ca136a30f9 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Tue, 8 Jul 2025 17:28:51 -0700
Subject: [PATCH 04/15] consistent apis

---
 sdk/js/src/base.ts              |  6 +++---
 sdk/python/foundry_local/api.py | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sdk/js/src/base.ts b/sdk/js/src/base.ts
index 7c6d6adc..4b2c0141 100644
--- a/sdk/js/src/base.ts
+++ b/sdk/js/src/base.ts
@@ -277,12 +277,12 @@ export class FoundryLocalManager {
    * @param {string} aliasOrModelId - The alias or model ID.
    * @returns {Promise<boolean>} True if a newer version is available, otherwise false.
    */
-  async isModelUpgradable(aliasOrModelId: string): Promise<boolean> {
+  async isModelUpgradeable(aliasOrModelId: string): Promise<boolean> {
     const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo
 
-    const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradable/${modelInfo.id}`)
+    const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradeable/${modelInfo.id}`)
     const data = await response.json()
-    return data.upgradable
+    return data.upgradeable
   }
 
   /**
diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py
index 7d73765d..3a1bec0f 100644
--- a/sdk/python/foundry_local/api.py
+++ b/sdk/python/foundry_local/api.py
@@ -271,7 +271,7 @@ def download_model(self, alias_or_model_id: str, token: str | None = None, force
             )
         return model_info
 
-    def is_model_upgradable(self, alias_or_model_id: str) -> bool:
+    def is_model_upgradeable(self, alias_or_model_id: str) -> bool:
         """
         Check if a newer version of a model is available.
 
@@ -285,10 +285,10 @@ def is_model_upgradable(self, alias_or_model_id: str) -> bool:
             ValueError: If the model is not found in the catalog.
         """
         model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True)
-        logger.info("Checking if model '%s' (ID: '%s') is upgradable...", model_info.alias, model_info.id)
+        logger.info("Checking if model '%s' (ID: '%s') is upgradeable...", model_info.alias, model_info.id)
 
-        response = self.httpx_client.get(f"/foundry/upgradable/{model_info.id}")
-        return response.get("upgradable", False)
+        response = self.httpx_client.get(f"/openai/upgradeable/{model_info.id}")
+        return response.get("upgradeable", False)
 
     def upgrade_model(self, alias_or_model_id: str, token: str | None = None) -> None:
         """
@@ -318,7 +318,7 @@ def upgrade_model(self, alias_or_model_id: str, token: str | None = None) -> Non
             "IgnorePipeReport": True,
         },
 
-        response_body = self.httpx_client.post_with_progress("/foundry/upgrade", body=body)
+        response_body = self.httpx_client.post_with_progress("/openai/upgrade", body=body)
 
         if not response_body.get("success", False):
             raise RuntimeError(

From 2a296b3b89a4dfc6674230cb1a41464a918a6750 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Tue, 8 Jul 2025 22:19:09 -0700
Subject: [PATCH 05/15] Add C# APIs

---
 sdk/cs/src/FoundryLocalManager.cs | 62 +++++++++++++++++++++++++++++++
 sdk/cs/src/FoundryModelInfo.cs    | 29 +++++++++++++++
 2 files changed, 91 insertions(+)

diff --git a/sdk/cs/src/FoundryLocalManager.cs b/sdk/cs/src/FoundryLocalManager.cs
index f5e3a132..b71cf9a1 100644
--- a/sdk/cs/src/FoundryLocalManager.cs
+++ b/sdk/cs/src/FoundryLocalManager.cs
@@ -245,6 +245,68 @@ public async Task<List<ModelInfo>> ListCachedModelsAsync(CancellationToken ct =
         return modelInfo;
     }
 
+    public async Task<bool> IsModelUpgradeableAsync(string aliasOrModelId, CancellationToken ct = default)
+    {
+        var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct)
+            ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog.");
+
+        await StartServiceAsync(ct);
+        var response = await _serviceClient!.GetAsync($"/openai/upgradeable/{modelInfo.ModelId}", ct);
+        response.EnsureSuccessStatusCode();
+
+        var jsonResponse = await response.Content.ReadAsStringAsync(ct);
+        using var jsonDoc = JsonDocument.Parse(jsonResponse);
+        return jsonDoc.RootElement.GetProperty("upgradeable").GetBoolean();
+    }
+
+    public async Task<ModelInfo?> UpgradeModelAsync(
+        string aliasOrModelId,
+        string? token = null,
+        CancellationToken ct = default)
+    {
+        var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct)
+            ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog.");
+
+        var request = new UpgradeRequest
+        {
+            Model = new UpgradeRequest.UpgradeBody
+            {
+                Name = modelInfo.ModelId,
+                Uri = modelInfo.Uri,
+                Publisher = modelInfo.Publisher,
+                ProviderType = modelInfo.ProviderType == "AzureFoundry" ? "AzureFoundryLocal" : modelInfo.ProviderType,
+                PromptTemplate = modelInfo.PromptTemplate
+            },
+            Token = token ?? "",
+            IgnorePipeReport = true
+        };
+
+        var response = await _serviceClient!.PostAsJsonAsync("/openai/upgrade", request, ct);
+        response.EnsureSuccessStatusCode();
+        var responseBody = await response.Content.ReadAsStringAsync(ct);
+
+        // Find the last '{' to get the start of the JSON object
+        var jsonStart = responseBody.LastIndexOf('{');
+        if (jsonStart == -1)
+        {
+            throw new InvalidOperationException("No JSON object found in response.");
+        }
+
+        var jsonPart = responseBody[jsonStart..];
+
+        // Parse the JSON part
+        using var jsonDoc = JsonDocument.Parse(jsonPart);
+        var success = jsonDoc.RootElement.GetProperty("success").GetBoolean();
+        var errorMessage = jsonDoc.RootElement.GetProperty("errorMessage").GetString();
+
+        if (!success)
+        {
+            throw new InvalidOperationException($"Failed to download model: {errorMessage}");
+        }
+
+        return modelInfo;
+    }
+
     public async Task<ModelInfo> LoadModelAsync(string aliasOrModelId, TimeSpan? timeout = null, CancellationToken ct = default)
     {
         var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct) ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog.");
diff --git a/sdk/cs/src/FoundryModelInfo.cs b/sdk/cs/src/FoundryModelInfo.cs
index e9f7c6fe..15b537ba 100644
--- a/sdk/cs/src/FoundryModelInfo.cs
+++ b/sdk/cs/src/FoundryModelInfo.cs
@@ -123,7 +123,36 @@ internal sealed class ModelInfo
 
     [JsonPropertyName("IgnorePipeReport")]
     public required bool IgnorePipeReport { get; set; }
+}
+
+internal sealed class UpgradeRequest
+{
+    internal sealed class UpgradeBody
+    {
+        [JsonPropertyName("Name")]
+        public required string Name { get; set; } = string.Empty;
+
+        [JsonPropertyName("Uri")]
+        public required string Uri { get; set; } = string.Empty;
+
+        [JsonPropertyName("Publisher")]
+        public required string Publisher { get; set; } = string.Empty;
+
+        [JsonPropertyName("ProviderType")]
+        public required string ProviderType { get; set; } = string.Empty;
 
+        [JsonPropertyName("PromptTemplate")]
+        public required PromptTemplate PromptTemplate { get; set; }
+    }
+
+    [JsonPropertyName("model")]
+    public required UpgradeBody Model { get; set; }
+
+    [JsonPropertyName("token")]
+    public required string Token { get; set; }
+
+    [JsonPropertyName("IgnorePipeReport")]
+    public required bool IgnorePipeReport { get; set; }
 }
 
 public record ModelDownloadProgress

From c8190b477779cc50b431590ec66da67bb415febf Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Wed, 9 Jul 2025 15:53:35 -0700
Subject: [PATCH 06/15] add rust APIs

---
 sdk/rust/src/api.rs | 95 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/sdk/rust/src/api.rs b/sdk/rust/src/api.rs
index f212e578..0adabd29 100644
--- a/sdk/rust/src/api.rs
+++ b/sdk/rust/src/api.rs
@@ -397,6 +397,101 @@ impl FoundryLocalManager {
         Ok(model_info)
     }
 
+    /// Checks if a newer version of a model is available.
+    ///
+    /// # Arguments
+    ///
+    /// * `alias_or_model_id` - The alias or model ID to check for upgrades.
+    ///
+    /// # Returns
+    ///
+    /// True if a newer version is available, otherwise false.
+    pub async fn is_model_upgradeable(
+        &mut self,
+        alias_or_model_id: &str
+    ) -> Result<bool> {
+        let model_info = self.get_model_info(alias_or_model_id, true).await?;
+        let url = format!("/openai/upgradeable/{}", model_info.id);
+
+        let client = self.client()?;
+        let response: Option<Value> = client.get(&url, None).await?;
+
+        let data = response.ok_or_else(|| anyhow!("Failed to check model upgrade availability"))?;
+        data["upgradeable"]
+            .as_bool()
+            .ok_or_else(|| anyhow!("Invalid upgrade response format"))
+    }
+
+    /// Upgrades a model to its latest version.
+    ///
+    /// # Arguments
+    ///
+    /// * `alias_or_model_id` - The alias or model ID to upgrade.
+    /// * `token` - Optional token for authentication.
+    ///
+    /// # Returns
+    ///
+    /// The upgraded model information.
+    pub async fn upgrade_model(
+        &mut self,
+        alias_or_model_id: &str,
+        token: Option<&str>,
+    ) -> Result<FoundryModelInfo> {
+        let model_info = self.get_model_info(alias_or_model_id, true).await?;
+        info!(
+            "Upgrading model: {} ({})",
+            model_info.alias, model_info.id
+        );
+
+        // Create the upgrade body similar to the JS implementation
+        let mut body = serde_json::json!({
+            "model": {
+                "Name": model_info.id,
+                "Uri": model_info.uri,
+                "Publisher": model_info.publisher,
+                "ProviderType": if model_info.provider == "AzureFoundry" {
+                    "AzureFoundryLocal"
+                } else {
+                    model_info.provider
+                },
+                "PromptTemplate": model_info.prompt_template,
+            },
+            "IgnorePipeReport": true
+        });
+
+        // Add token if provided
+        if let Some(t) = token {
+            body["token"] = Value::String(t.to_string());
+        }
+
+        let client = self.client()?;
+        let response: Value = client
+            .post_with_progress("/openai/upgrade", Some(body))
+            .await?;
+
+        // Check if the upgrade was successful
+        if !response["success"].as_bool().unwrap_or(false) {
+            let error_msg = response["error"]
+                .as_str()
+                .unwrap_or("Unknown error");
+
+            return Err(anyhow!(
+                "Failed to upgrade model with alias '{}' and ID '{}': {}",
+                model_info.alias,
+                model_info.id,
+                error_msg
+            ));
+        }
+
+        // Refresh the model cache to get the latest information
+        self.refresh_catalog();
+
+        // Get the updated model information
+        let updated_model_info = self.get_model_info(&model_info.id, true).await?;
+
+        Ok(updated_model_info)
+    }
+
     /// Load a model.
     ///
     /// # Arguments

From 075e3ed2e2f61a269130697fa93b5a54bcfc6597 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Sun, 27 Jul 2025 17:10:04 -0700
Subject: [PATCH 07/15] update python API to support versioning

---
 sdk/python/foundry_local/__init__.py |   2 +-
 sdk/python/foundry_local/api.py      | 127 +++++++++++++++------
 sdk/python/foundry_local/models.py   |   2 +
 sdk/python/test/test_api.py          | 161 +++++++++++++++++++--------
 sdk/python/test/test_models.py       |   4 +
 5 files changed, 216 insertions(+), 80 deletions(-)

diff --git a/sdk/python/foundry_local/__init__.py b/sdk/python/foundry_local/__init__.py
index 2a0a71ff..b3681488 100644
--- a/sdk/python/foundry_local/__init__.py
+++ b/sdk/python/foundry_local/__init__.py
@@ -20,4 +20,4 @@
 
 __all__ = ["FoundryLocalManager"]
 
-__version__ = "0.3.1"
+__version__ = "0.4.0"
diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py
index 3a1bec0f..afbd73d0 100644
--- a/sdk/python/foundry_local/api.py
+++ b/sdk/python/foundry_local/api.py
@@ -133,6 +133,18 @@ def list_catalog_models(self) -> list[FoundryModelInfo]:
             ]
         return self._catalog_list
 
+    """
+    Extract numeric version from ID (e.g. model-x:3 → 3)
+
+    Returns:
+        int: Numeric version extracted from the model ID, or -1 if not found.
+    """
+    def _get_version(self, model_id: str) -> int:
+        try:
+            return int(model_id.split(":")[-1])
+        except (ValueError, IndexError):
+            return -1
+
     def _get_catalog_dict(self) -> dict[str, FoundryModelInfo]:
         """
         Get a dictionary of available models. Keyed by model ID and alias. Alias points to the most preferred model.
@@ -169,6 +181,17 @@ def _get_catalog_dict(self) -> dict[str, FoundryModelInfo]:
         for alias, candidates in alias_candidates.items():
             self._catalog_dict[alias] = min(candidates, key=lambda model: priority_map.get(model.runtime, float("inf")))
 
+        # Choose the best model for each alias based on priority and version
+        for alias, candidates in alias_candidates.items():
+            best_candidate = max(
+                candidates,
+                key=lambda m: (
+                    -priority_map.get(m.runtime, float("inf")),  # negate to mimic ascending priority
+                    self._get_version(m.id) # pick the highest version
+                )
+            )
+            self._catalog_dict[alias] = best_candidate
+
         return self._catalog_dict
 
     def refresh_catalog(self):
@@ -178,10 +201,11 @@ def refresh_catalog(self):
 
     def get_model_info(self, alias_or_model_id: str, raise_on_not_found: bool = False) -> FoundryModelInfo | None:
         """
-        Get the model information by alias or ID.
+        Get the model information by of the latest model that matches the given alias or ID.
 
         Args:
             alias_or_model_id (str): Alias or Model ID. If it is an alias, the most preferred model will be returned.
+                                     If it is a model ID, it can contain a ":<version>" suffix or not.
             raise_on_not_found (bool): If True, raise an error if the model is not found. Default is False.
 
         Returns:
@@ -190,11 +214,62 @@ def get_model_info(self, alias_or_model_id: str, raise_on_not_found: bool = Fals
         Raises:
             ValueError: If the model is not found and raise_on_not_found is True.
         """
-        model_info = self._get_catalog_dict().get(alias_or_model_id)
+        catalog = self._get_catalog_dict()
+        model_info = None
+
+        # id with version, or alias
+        if alias_or_model_id in catalog:
+            model_info = catalog[alias_or_model_id]
+        elif ":" not in alias_or_model_id:
+            # alias_or_model_id is an id that does not contain a version
+            prefix = f"{alias_or_model_id}:"
+            best_version = -1
+
+            for key, info in catalog.items():
+                if key.startswith(prefix):
+                    try:
+                        version = self._get_version(key)
+                        if version > best_version:
+                            best_version = version
+                            model_info = info
+                    except ValueError:
+                        continue  # Skip if version is not numeric
+
         if model_info is None and raise_on_not_found:
             raise ValueError(f"Model {alias_or_model_id} not found in the catalog.")
         return model_info
 
+    def _get_latest_model_info(self, alias_or_model_id: str, raise_on_not_found: bool = False) -> FoundryModelInfo | None:
+        """
+        Get the latest model information by alias or model ID.
+        The difference from get_model_info is that this method will return the latest version of the model
+        even when you pass it a model id that contains a version suffix.
+
+        Args:
+            alias_or_model_id (str): Alias or Model ID. If it is an alias, the most preferred model will be returned.
+            raise_on_not_found (bool): If True, raise an error if the model is not found. Default is False.
+
+        Returns:
+            FoundryModelInfo | None: Latest model information or None if not found.
+
+        Raises:
+            ValueError: If the model is not found and raise_on_not_found is True.
+        """
+        catalog = self._get_catalog_dict()
+
+        # if alias or id without version
+        if ":" not in alias_or_model_id:
+            # if alias
+            if catalog[alias_or_model_id] is not None:
+                return catalog[alias_or_model_id]
+            else:
+                # if id without version, then get_model_info will get the latest version
+                return self.get_model_info(alias_or_model_id, raise_on_not_found)
+        else:
+            # if id with version, remove the ":<version>" suffix and use the name to get the latest model
+            id_without_version = alias_or_model_id.split(":")[0]
+            return self.get_model_info(id_without_version, raise_on_not_found)
+
     # Cache management api
     def get_cache_location(self):
         """
@@ -284,48 +359,34 @@ def is_model_upgradeable(self, alias_or_model_id: str) -> bool:
         Raises:
             ValueError: If the model is not found in the catalog.
         """
-        model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True)
-        logger.info("Checking if model '%s' (ID: '%s') is upgradeable...", model_info.alias, model_info.id)
+        logger.info("Checking if model '%s' is upgradeable...", alias_or_model_id)
+        model_info = self._get_latest_model_info(alias_or_model_id, raise_on_not_found=True)
+        if model_info is None:
+            return False # Model not found in the catalog
+
+        latest_version = self._get_version(model_info.id)
+        if latest_version == -1:
+            return False  # Invalid model ID format
 
-        response = self.httpx_client.get(f"/openai/upgradeable/{model_info.id}")
-        return response.get("upgradeable", False)
+        cached_models = self.list_cached_models()
+        for cached_model in cached_models:
+            if cached_model.id == model_info.id and self._get_version(cached_model.id) == latest_version:
+                return False # Model is already the latest version
+
+        return True  # The latest version is not in the cache
 
     def upgrade_model(self, alias_or_model_id: str, token: str | None = None) -> None:
         """
-        Download the latest version of a model to the local cache.
-
+        Download the latest version of a model to the local cache, if the latest version is not already cached.
         Args:
             alias_or_model_id (str): Alias or Model ID.
             token (str | None): Optional token for authentication.
-
         Raises:
             ValueError: If the model is not found in the catalog.
             RuntimeError: If the model upgrade fails.
         """
-        model_info = self.get_model_info(alias_or_model_id, raise_on_not_found=True)
-        logger.info("Upgrading model with alias '%s' and ID '%s'...", model_info.alias, model_info.id)
-
-        upgrade_body = {
-            "Name": model_info.id,
-            "Uri": model_info.uri,
-            "Publisher": model_info.publisher,
-            "ProviderType": "AzureFoundryLocal" if model_info.provider == "AzureFoundry" else model_info.provider,
-            "PromptTemplate": model_info.prompt_template,
-        }
-        body={
-            "model": upgrade_body,
-            "token": token,
-            "IgnorePipeReport": True,
-        },
-
-        response_body = self.httpx_client.post_with_progress("/openai/upgrade", body=body)
-
-        if not response_body.get("success", False):
-            raise RuntimeError(
-                f"Failed to upgrade model with error: {response_body.get('errorMessage', 'Unknown error')}"
-            )
-
-        return model_info
+        model_info = self._get_latest_model_info(alias_or_model_id, raise_on_not_found=True)
+        return self.download_model(model_info.id, token=token)
 
     def load_model(self, alias_or_model_id: str, ttl: int = 600) -> FoundryModelInfo:
         """
diff --git a/sdk/python/foundry_local/models.py b/sdk/python/foundry_local/models.py
index 5168adaa..c4018c07 100644
--- a/sdk/python/foundry_local/models.py
+++ b/sdk/python/foundry_local/models.py
@@ -78,6 +78,8 @@ class FoundryListResponseModel(BaseModel):
     license: str = Field(..., description="License of the model")
     licenseDescription: str = Field(..., description="License description of the model")
     parentModelUri: str = Field(..., description="Parent model URI of the model")
+    maxOutputTokens: int = Field(..., description="Maximum output tokens for the model")
+    minFLVersion: str = Field(..., description="Minimum Foundry Local version required for the model")
 
 
 class FoundryModelInfo(BaseModel):
diff --git a/sdk/python/test/test_api.py b/sdk/python/test/test_api.py
index 707941af..a301a2fc 100644
--- a/sdk/python/test/test_api.py
+++ b/sdk/python/test/test_api.py
@@ -24,13 +24,15 @@
     "supportsToolCalling": False,
     "license": "MIT",
     "licenseDescription": "This model is provided under the License Terms available at ...",
+    "maxOutputTokens": 1024,
+    "minFLVersion": "1.0.0",
 }
 
 # Sample catalog with 3 aliases with different combos
 MOCK_CATALOG_DATA = [
     # generic-gpu, generic-cpu
     {
-        "name": "model-1-generic-gpu",
+        "name": "model-1-generic-gpu:1",
         "displayName": "model-1-generic-gpu",
         "uri": "azureml://registries/azureml/models/model-1-generic-gpu/versions/1",
         "runtime": {"deviceType": "GPU", "executionProvider": "WebGpuExecutionProvider"},
@@ -39,7 +41,7 @@
         **MOCK_INFO,
     },
     {
-        "name": "model-1-generic-cpu",
+        "name": "model-1-generic-cpu:1",
         "displayName": "model-1-generic-cpu",
         "uri": "azureml://registries/azureml/models/model-1-generic-cpu/versions/1",
         "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"},
@@ -47,9 +49,18 @@
         "parentModelUri": "azureml://registries/azureml/models/model-1/versions/1",
         **MOCK_INFO,
     },
+    {
+        "name": "model-1-generic-cpu:2",
+        "displayName": "model-1-generic-cpu",
+        "uri": "azureml://registries/azureml/models/model-1-generic-cpu/versions/2",
+        "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"},
+        "alias": "model-1",
+        "parentModelUri": "azureml://registries/azureml/models/model-1/versions/2",
+        **MOCK_INFO,
+    },
     # npu, generic-cpu
     {
-        "name": "model-2-npu",
+        "name": "model-2-npu:1",
         "displayName": "model-2-npu",
         "uri": "azureml://registries/azureml/models/model-2-npu/versions/1",
         "runtime": {"deviceType": "NPU", "executionProvider": "QNNExecutionProvider"},
@@ -58,7 +69,16 @@
         **MOCK_INFO,
     },
     {
-        "name": "model-2-generic-cpu",
+        "name": "model-2-npu:2",
+        "displayName": "model-2-npu",
+        "uri": "azureml://registries/azureml/models/model-2-npu/versions/2",
+        "runtime": {"deviceType": "NPU", "executionProvider": "QNNExecutionProvider"},
+        "alias": "model-2",
+        "parentModelUri": "azureml://registries/azureml/models/model-2/versions/2",
+        **MOCK_INFO,
+    },
+    {
+        "name": "model-2-generic-cpu:1",
         "displayName": "model-2-generic-cpu",
         "uri": "azureml://registries/azureml/models/model-2-generic-cpu/versions/1",
         "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"},
@@ -68,7 +88,7 @@
     },
     # cuda-gpu, generic-gpu, generic-cpu
     {
-        "name": "model-3-cuda-gpu",
+        "name": "model-3-cuda-gpu:1",
         "displayName": "model-3-cuda-gpu",
         "uri": "azureml://registries/azureml/models/model-3-cuda-gpu/versions/1",
         "runtime": {"deviceType": "GPU", "executionProvider": "CUDAExecutionProvider"},
@@ -77,7 +97,7 @@
         **MOCK_INFO,
     },
     {
-        "name": "model-3-generic-gpu",
+        "name": "model-3-generic-gpu:1",
         "displayName": "model-3-generic-gpu",
         "uri": "azureml://registries/azureml/models/model-3-generic-gpu/versions/1",
         "runtime": {"deviceType": "GPU", "executionProvider": "WebGpuExecutionProvider"},
@@ -86,7 +106,7 @@
         **MOCK_INFO,
     },
     {
-        "name": "model-3-generic-cpu",
+        "name": "model-3-generic-cpu:1",
         "displayName": "model-3-generic-cpu",
         "uri": "azureml://registries/azureml/models/model-3-generic-cpu/versions/1",
         "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"},
@@ -96,7 +116,7 @@
     },
     # generic-cpu
     {
-        "name": "model-4-generic-gpu",
+        "name": "model-4-generic-gpu:1",
         "displayName": "model-4-generic-gpu",
         "uri": "azureml://registries/azureml/models/model-4-generic-gpu/versions/1",
         "runtime": {"deviceType": "GPU", "executionProvider": "WebGpuExecutionProvider"},
@@ -110,10 +130,10 @@
 MOCK_STATUS_RESPONSE = {"modelDirPath": "/test/path/to/models"}
 
 # Mock response for /openai/models
-MOCK_LOCAL_MODELS = ["model-2-npu", "model-4-generic-gpu"]
+MOCK_LOCAL_MODELS = ["model-2-npu:1", "model-4-generic-gpu:1"]
 
 # Mock response for /openai/loadedmodels
-MOCK_LOADED_MODELS = ["model-2-npu"]
+MOCK_LOADED_MODELS = ["model-2-npu:1"]
 
 
 @pytest.fixture(scope="module", autouse=True)
@@ -174,7 +194,7 @@ def test_initialization(mock_http_client):
     # Test with bootstrap and model_id
     with mock.patch("foundry_local.api.start_service") as mock_start:
         mock_start.return_value = "http://localhost:5272"
-        manager = FoundryLocalManager(alias_or_model_id="model-2", bootstrap=True)
+        manager = FoundryLocalManager(alias_or_model_id="model-4", bootstrap=True)
         mock_start.assert_called_once()
         mock_http_client.get.assert_any_call("/foundry/list")
         # in local models
@@ -186,21 +206,25 @@ def test_list_catalog_models(mock_http_client):
     manager = FoundryLocalManager(bootstrap=False)
     models = manager.list_catalog_models()
     mock_http_client.get.assert_called_once_with("/foundry/list")
-    assert len(models) == 8
+    assert len(models) == len(MOCK_CATALOG_DATA)
     assert all(isinstance(model, FoundryModelInfo) for model in models)
     assert [model.id for model in models] == [
-        "model-1-generic-gpu",
-        "model-1-generic-cpu",
-        "model-2-npu",
-        "model-2-generic-cpu",
-        "model-3-cuda-gpu",
-        "model-3-generic-gpu",
-        "model-3-generic-cpu",
-        "model-4-generic-gpu",
+        "model-1-generic-gpu:1",
+        "model-1-generic-cpu:1",
+        "model-1-generic-cpu:2",
+        "model-2-npu:1",
+        "model-2-npu:2",
+        "model-2-generic-cpu:1",
+        "model-3-cuda-gpu:1",
+        "model-3-generic-gpu:1",
+        "model-3-generic-cpu:1",
+        "model-4-generic-gpu:1",
     ]
     assert [model.alias for model in models] == [
         "model-1",
         "model-1",
+        "model-1",
+        "model-2",
         "model-2",
         "model-2",
         "model-3",
@@ -231,18 +255,20 @@ def test_get_model_info(platform, mock_http_client):
         with pytest.raises(ValueError):
             manager.get_model_info("unknown-model", raise_on_not_found=True)
 
-        # with id
-        assert manager.get_model_info("model-1-generic-cpu").id == "model-1-generic-cpu"
+        # with id that contains version
+        assert manager.get_model_info("model-1-generic-cpu:1").id == "model-1-generic-cpu:1"
+
+        # with id that does not contain version
+        assert manager.get_model_info("model-1-generic-cpu").id == "model-1-generic-cpu:2"
 
         # with alias
         # generic-cpu preferred on Windows
         assert (
-            manager.get_model_info("model-1").id == "model-1-generic-cpu"
-            if platform == "Windows"
-            else "model-1-generic-gpu"
+            manager.get_model_info("model-1").id == "model-1-generic-cpu:2" if platform == "Windows"
+                                                    else "model-1-generic-gpu:1"
         )
-        assert manager.get_model_info("model-2").id == "model-2-npu"
-        assert manager.get_model_info("model-3").id == "model-3-cuda-gpu"
+        assert manager.get_model_info("model-2").id == "model-2-npu:2" # latest version, even if not in cache
+        assert manager.get_model_info("model-3").id == "model-3-cuda-gpu:1"
 
 
 def test_list_cached_models(mock_http_client):
@@ -250,8 +276,8 @@ def test_list_cached_models(mock_http_client):
     manager = FoundryLocalManager(bootstrap=False)
     local_models = manager.list_cached_models()
     assert len(local_models) == 2
-    assert local_models[0].id == "model-2-npu"
-    assert local_models[1].id == "model-4-generic-gpu"
+    assert local_models[0].id == "model-2-npu:1"
+    assert local_models[1].id == "model-4-generic-gpu:1"
 
 
 def test_list_loaded_models(mock_http_client):
@@ -259,7 +285,7 @@ def test_list_loaded_models(mock_http_client):
     manager = FoundryLocalManager(bootstrap=False)
     loaded_models = manager.list_loaded_models()
     assert len(loaded_models) == 1
-    assert loaded_models[0].id == "model-2-npu"
+    assert loaded_models[0].id == "model-2-npu:1"
 
 
 def test_download_model(mock_http_client):
@@ -268,20 +294,24 @@ def test_download_model(mock_http_client):
 
     # Test downloading a new model
     model_info = manager.download_model("model-3")
-    assert model_info.id == "model-3-cuda-gpu"
+    assert model_info.id == "model-3-cuda-gpu:1"
     mock_http_client.post_with_progress.assert_called_once()
-
-    # Reset mock for next test
-    mock_http_client.post_with_progress.reset_mock()
+    mock_http_client.post_with_progress.reset_mock() # Reset mock for next test
 
     # Test downloading an already cached model
-    model_info = manager.download_model("model-2")
-    assert model_info.id == "model-2-npu"
+    model_info = manager.download_model("model-2-npu:1")
+    assert model_info.id == "model-2-npu:1"
     mock_http_client.post_with_progress.assert_not_called()
 
+    # Test download a model that is not at the latest version
+    model_info = manager.download_model("model-2")
+    assert model_info.id == "model-2-npu:2"
+    mock_http_client.post_with_progress.assert_called_once()
+    mock_http_client.post_with_progress.reset_mock() # Reset mock for next test
+
     # Test force download
     model_info = manager.download_model("model-2", force=True)
-    assert model_info.id == "model-2-npu"
+    assert model_info.id == "model-2-npu:2"
     mock_http_client.post_with_progress.assert_called_once()
 
     # Test download failure
@@ -290,20 +320,59 @@ def test_download_model(mock_http_client):
         manager.download_model("model-1")
 
 
+def test_is_model_upgradeable(mock_http_client):
+    manager = FoundryLocalManager(bootstrap=False)
+
+    # Not in cache, even if at the latest version
+    assert manager.is_model_upgradeable("model-1") is True
+    assert manager.is_model_upgradeable("model-1-generic-cpu:1") is True
+    assert manager.is_model_upgradeable("model-1-generic-cpu:2") is True
+    assert manager.is_model_upgradeable("model-1-generic-gpu:1") is True
+    assert manager.is_model_upgradeable("model-2-npu:2") is True
+
+    # In cache, at the latest version
+    assert manager.is_model_upgradeable("model-4-generic-gpu:1") is False
+
+    # In cache, not at the latest version
+    assert manager.is_model_upgradeable("model-2-npu:1") is True
+
+
+def test_upgrade_model(mock_http_client):
+    """Test downloading a model."""
+    manager = FoundryLocalManager(bootstrap=False)
+
+    # Test upgrading a model that is not in the cache at all
+    model_info = manager.upgrade_model("model-3")
+    assert model_info.id == "model-3-cuda-gpu:1"
+    mock_http_client.post_with_progress.assert_called_once()
+    mock_http_client.post_with_progress.reset_mock() # Reset mock for next test
+
+    # Test upgrading a model that has an older version in the cache
+    model_info = manager.upgrade_model("model-2-npu:1")
+    assert model_info.id == "model-2-npu:2"
+    mock_http_client.post_with_progress.assert_called_once()
+    mock_http_client.post_with_progress.reset_mock() # Reset mock for next test
+
+    # Test upgrading a model that has the latest version in the cache
+    model_info = manager.upgrade_model("model-4")
+    assert model_info.id == "model-4-generic-gpu:1"
+    mock_http_client.post_with_progress.assert_not_called()
+
+
 def test_load_model(mock_http_client):
     """Test loading a model."""
     manager = FoundryLocalManager(bootstrap=False)
 
     # already loaded model
     model_info = manager.load_model("model-2")
-    assert model_info.id == "model-2-npu"
-    mock_http_client.get.assert_any_call("/openai/load/model-2-npu", query_params={"ttl": 600})
+    assert model_info.id == "model-2-npu:2"
+    mock_http_client.get.assert_any_call("/openai/load/model-2-npu:2", query_params={"ttl": 600})
 
     # not loaded model
     model_info = manager.load_model("model-4")
-    assert model_info.id == "model-4-generic-gpu"
+    assert model_info.id == "model-4-generic-gpu:1"
     # ep override, should be cuda since there is cuda support
-    mock_http_client.get.assert_any_call("/openai/load/model-4-generic-gpu", query_params={"ttl": 600, "ep": "cuda"})
+    mock_http_client.get.assert_any_call("/openai/load/model-4-generic-gpu:1", query_params={"ttl": 600, "ep": "cuda"})
 
     # Test loading a non-downloaded model
     def mock_get(path, query_params=None):
@@ -319,17 +388,17 @@ def test_unload_model(mock_http_client):
     manager = FoundryLocalManager(bootstrap=False)
 
     # Test unloading a loaded model
-    manager.unload_model("model-2")
-    mock_http_client.get.assert_any_call("/openai/unload/model-2-npu", query_params={"force": False})
+    manager.unload_model("model-2-npu:1")
+    mock_http_client.get.assert_any_call("/openai/unload/model-2-npu:1", query_params={"force": False})
 
     # Test unloading a model that's not loaded
     mock_http_client.get.reset_mock()
     manager.unload_model("model-4")
     assert (
-        mock.call("/openai/unload/model-4-generic-gpu", query_params={"force": False})
+        mock.call("/openai/unload/model-4-generic-gpu:1", query_params={"force": False})
         not in mock_http_client.get.call_args_list
     )
 
     # Test force unloading
-    manager.unload_model("model-2", force=True)
-    mock_http_client.get.assert_any_call("/openai/unload/model-2-npu", query_params={"force": True})
+    manager.unload_model("model-2-npu:1", force=True)
+    mock_http_client.get.assert_any_call("/openai/unload/model-2-npu:1", query_params={"force": True})
diff --git a/sdk/python/test/test_models.py b/sdk/python/test/test_models.py
index b1feca29..a6d8a779 100644
--- a/sdk/python/test/test_models.py
+++ b/sdk/python/test/test_models.py
@@ -54,6 +54,8 @@ def test_foundry_list_response_model():
         "license": "MIT",
         "licenseDescription": "Test license",
         "parentModelUri": "azureml://parent",
+        "maxOutputTokens": 1024,
+        "minFLVersion": "1.0.0"
     }
 
     model = FoundryListResponseModel.model_validate(response_data)
@@ -134,6 +136,8 @@ def test_from_list_response():
         "license": "MIT",
         "licenseDescription": "Test license",
         "parentModelUri": "azureml://parent",
+        "maxOutputTokens": 1024,
+        "minFLVersion": "1.0.0",
     }
 
     # Test with dict

From 88e94c678ce456cef37f56efb8627bc2d3672619 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Sun, 27 Jul 2025 19:06:23 -0700
Subject: [PATCH 08/15] remove redundant code

---
 sdk/python/foundry_local/api.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py
index afbd73d0..56a736a4 100644
--- a/sdk/python/foundry_local/api.py
+++ b/sdk/python/foundry_local/api.py
@@ -177,10 +177,6 @@ def _get_catalog_dict(self) -> dict[str, FoundryModelInfo]:
 
         priority_map = {provider: index for index, provider in enumerate(preferred_order)}
 
-        # Choose the preferred model for each alias
-        for alias, candidates in alias_candidates.items():
-            self._catalog_dict[alias] = min(candidates, key=lambda model: priority_map.get(model.runtime, float("inf")))
-
         # Choose the best model for each alias based on priority and version
         for alias, candidates in alias_candidates.items():
             best_candidate = max(
@@ -201,7 +197,7 @@ def refresh_catalog(self):
 
     def get_model_info(self, alias_or_model_id: str, raise_on_not_found: bool = False) -> FoundryModelInfo | None:
         """
-        Get the model information by of the latest model that matches the given alias or ID.
+        Get the model information of the latest model that matches the given alias or ID.
 
         Args:
             alias_or_model_id (str): Alias or Model ID. If it is an alias, the most preferred model will be returned.
@@ -255,6 +251,11 @@ def _get_latest_model_info(self, alias_or_model_id: str, raise_on_not_found: boo
         Raises:
             ValueError: If the model is not found and raise_on_not_found is True.
         """
+        if not alias_or_model_id:
+            if raise_on_not_found:
+                raise ValueError("The provided nodel alias or ID was empty.")
+            return None
+
         catalog = self._get_catalog_dict()
 
         # if alias or id without version
@@ -371,7 +372,7 @@ def is_model_upgradeable(self, alias_or_model_id: str) -> bool:
         cached_models = self.list_cached_models()
         for cached_model in cached_models:
             if cached_model.id == model_info.id and self._get_version(cached_model.id) == latest_version:
-                return False # Model is already the latest version
+                return False # Cached model is already at the latest version
 
         return True  # The latest version is not in the cache
 

From ff703993688ef3205bf817579bdc16201e27fc54 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Sun, 27 Jul 2025 20:08:49 -0700
Subject: [PATCH 09/15] update JS API to support versioning

---
 sdk/js/package-lock.json |   4 +-
 sdk/js/package.json      |   2 +-
 sdk/js/src/base.ts       | 143 ++++++++++++++++++-------
 sdk/js/src/types.ts      |  40 ++-----
 sdk/js/test/base.test.ts | 219 +++++++++++++++++++++++++++++++++++----
 5 files changed, 314 insertions(+), 94 deletions(-)

diff --git a/sdk/js/package-lock.json b/sdk/js/package-lock.json
index 46a7cd26..bfd3da47 100644
--- a/sdk/js/package-lock.json
+++ b/sdk/js/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "foundry-local-sdk",
-  "version": "0.3.0",
+  "version": "0.4.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "foundry-local-sdk",
-      "version": "0.3.0",
+      "version": "0.4.0",
       "license": "MIT",
       "dependencies": {
         "whatwg-fetch": "^3.6.20"
diff --git a/sdk/js/package.json b/sdk/js/package.json
index e1999e8e..3bb2f510 100644
--- a/sdk/js/package.json
+++ b/sdk/js/package.json
@@ -1,6 +1,6 @@
 {
   "name": "foundry-local-sdk",
-  "version": "0.3.1",
+  "version": "0.4.0",
   "description": "Foundry Local Manager Javascript SDK",
   "repository": {
     "type": "git",
diff --git a/sdk/js/src/base.ts b/sdk/js/src/base.ts
index 4b2c0141..7fcf70de 100644
--- a/sdk/js/src/base.ts
+++ b/sdk/js/src/base.ts
@@ -3,7 +3,7 @@
 
 import * as client from './client.js'
 import { ExecutionProvider } from './types.js'
-import type { DownloadBody, UpgradeBody, Fetch, FoundryModelInfo, FoundryListResponseModel } from './types.js'
+import type { DownloadBody, Fetch, FoundryModelInfo, FoundryListResponseModel } from './types.js'
 
 /**
  * Utility function to detect if the platform is Windows.
@@ -112,6 +112,20 @@ export class FoundryLocalManager {
     return this.catalogList
   }
 
+  /**
+   * Extracts numeric version from ID (e.g. model-x:3 → 3)
+   * @returns {number} Numeric version extracted from the model ID, or -1 if not found.
+   */
+  private getVersion(modelId: string): number {
+    try {
+      const versionStr = modelId.split(":")[1];
+      const version = parseInt(versionStr, 10);
+      return isNaN(version) ? -1 : version;
+    } catch {
+      return -1;
+    }
+  }
+
   /**
    * Gets the catalog record.
    * @returns {Promise<Record<string, FoundryModelInfo>>} The catalog record.
@@ -149,12 +163,18 @@ export class FoundryLocalManager {
     // Choose the preferred model for each alias
     Object.entries(aliasCandidates).forEach(([alias, candidates]) => {
       const bestCandidate = candidates.reduce((best, current) => {
-        const bestPriority = priorityMap.get(best.runtime) ?? Infinity
-        const currentPriority = priorityMap.get(current.runtime) ?? Infinity
-        return currentPriority < bestPriority ? current : best
+        const bestPriority = -(priorityMap.get(best.runtime) ?? Infinity)
+        const currentPriority = -(priorityMap.get(current.runtime) ?? Infinity)
+
+        const bestVersion = this.getVersion(best.id)
+        const currentVersion = this.getVersion(current.id)
+
+        if (currentPriority > bestPriority || (currentPriority === bestPriority && currentVersion > bestVersion)) {
+          return current
+        }
+        return best
       })
 
-      // Explicitly assign the best candidate to avoid null/undefined issues
       if (this.catalogRecord) {
         this.catalogRecord[alias] = bestCandidate
       }
@@ -173,18 +193,73 @@ export class FoundryLocalManager {
   }
 
   /**
-   * Gets model information by alias or ID.
+   * Gets the model information of the latest model that matches the given alias or ID.
    * @param {string} aliasOrModelId - The alias or model ID.
    * @param {boolean} throwOnNotFound - Whether to throw an error if the model is not found.
    * @returns {Promise<FoundryModelInfo | null>} The model information or null if not found.
    */
   async getModelInfo(aliasOrModelId: string, throwOnNotFound = false): Promise<FoundryModelInfo | null> {
-    const catalogRecord = await this.getCatalogRecord()
-    const modelInfo = catalogRecord[aliasOrModelId]
+    const catalog = await this.getCatalogRecord()
+    let modelInfo: FoundryModelInfo | null = null;
+
+    // Exact match (ID with version or alias)
+    if (aliasOrModelId in catalog) {
+      modelInfo = catalog[aliasOrModelId];
+    } else if (!aliasOrModelId.includes(":")) {
+      // ID without version — find the latest version
+      const prefix = `${aliasOrModelId}:`;
+      let bestVersion = -1;
+
+      for (const key of Object.keys(catalog)) {
+        if (key.startsWith(prefix)) {
+          const version = this.getVersion(key);
+          if (version > bestVersion) {
+            bestVersion = version;
+            modelInfo = catalog[key];
+          }
+        }
+      }
+    }
+
     if (!modelInfo && throwOnNotFound) {
-      throw new Error(`Model with alias or ID ${aliasOrModelId} not found in the catalog`)
+      throw new Error(`Model with alias or ID '${aliasOrModelId}' not found in the catalog`);
     }
-    return modelInfo ?? null
+
+    return modelInfo;
+  }
+
+  /**
+   * Gets the latest model information by alias or model ID.
+   * The difference from getModelInfo is that this method will return the latest version of the model
+   * even when you pass it a model id that contains a version suffix.
+   * @param {string} aliasOrModelId - The alias or model ID.
+   * @param {boolean} throwOnNotFound - Whether to throw an error if the model is not found.
+   * @returns {Promise<FoundryModelInfo | null>} The model information or null if not found.
+   */
+  private async getLatestModelInfo(aliasOrModelId: string, throwOnNotFound = false): Promise<FoundryModelInfo | null> {
+    if (!aliasOrModelId) {
+      if (throwOnNotFound) {
+        throw new Error('The provided model alias or ID was empty.');
+      }
+      return null;
+    }
+
+    const catalog = await this.getCatalogRecord();
+
+    // alias or ID without version
+    if (!aliasOrModelId.includes(":")) {
+      const model = catalog[aliasOrModelId];
+      if (model) {
+        return model;
+      }
+
+      // if ID without version, then getModelInfo will get the latest version
+      return await this.getModelInfo(aliasOrModelId, throwOnNotFound);
+    }
+
+    // if ID with version, remove the ":<version>" suffix and use the name to get the latest model
+    const idWithoutVersion = aliasOrModelId.split(":")[0];
+    return await this.getModelInfo(idWithoutVersion, throwOnNotFound);
   }
 
   /**
@@ -278,11 +353,24 @@ export class FoundryLocalManager {
    * @returns {Promise<boolean>} True if a newer version is available, otherwise false.
    */
   async isModelUpgradeable(aliasOrModelId: string): Promise<boolean> {
-    const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo
+    const modelInfo = await this.getLatestModelInfo(aliasOrModelId, true);
+    if (!modelInfo) {
+      return false; // Model not found in the catalog
+    }
 
-    const response = await client.get(this.fetch, `${this.serviceUrl}/openai/upgradeable/${modelInfo.id}`)
-    const data = await response.json()
-    return data.upgradeable
+    const latestVersion = this.getVersion(modelInfo.id);
+    if (latestVersion === -1) {
+      return false; // Invalid version format
+    }
+
+    const cachedModels = await this.listCachedModels();
+    for (const cached of cachedModels) {
+      if (cached.id === modelInfo.id && this.getVersion(cached.id) === latestVersion) {
+        return false; // Cached model is already at the latest version
+      }
+    }
+
+    return true; // The latest version is not in the cache
   }
 
   /**
@@ -297,31 +385,8 @@ export class FoundryLocalManager {
     token?: string,
     onProgress?: (progress: number) => void,
   ): Promise<FoundryModelInfo> {
-    const modelInfo = (await this.getModelInfo(aliasOrModelId, true)) as FoundryModelInfo
-
-    const upgradeBody: UpgradeBody = {
-      Name: modelInfo.id,
-      Uri: modelInfo.uri,
-      Publisher: modelInfo.publisher,
-      ProviderType: modelInfo.provider === 'AzureFoundry' ? `AzureFoundryLocal` : modelInfo.provider,
-      PromptTemplate: modelInfo.promptTemplate,
-    }
-
-    const body = {
-      model: upgradeBody,
-      ...(token && { token }),
-      IgnorePipeReport: true,
-    }
-
-    const data = await client.postWithProgress(this.fetch, `${this.serviceUrl}/openai/upgrade`, body, onProgress)
-
-    if (!data.success) {
-      throw new Error(
-        `Failed to upgrade model with alias '${modelInfo.alias}' and ID '${modelInfo.id}': ${data.error}`,
-      )
-    }
-
-    return modelInfo
+    const modelInfo = await this.getLatestModelInfo(aliasOrModelId, true) as FoundryModelInfo;
+    return this.downloadModel(modelInfo.id, token, false, onProgress)
   }
 
   /**
diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts
index 1467bac5..8d4e852a 100644
--- a/sdk/js/src/types.ts
+++ b/sdk/js/src/types.ts
@@ -128,6 +128,16 @@ export interface FoundryListResponseModel {
    * The URI of the parent model.
    */
   parentModelUri: string
+
+  /**
+   * The maximum number of output tokens.
+   */
+  maxOutputTokens: number
+
+  /**
+   * The minimum Foundry Local version required to use this model.
+   */
+  minFLVersion: string
 }
 
 /**
@@ -219,33 +229,3 @@ export interface DownloadBody {
    */
   PromptTemplate: Record<string, string>
 }
-
-/**
- * Interface representing the body of an upgrade request.
- */
-export interface UpgradeBody {
-  /**
-   * The name of the model.
-   */
-  Name: string
-
-  /**
-   * The URI of the model.
-   */
-  Uri: string
-
-  /**
-   * The publisher of the model.
-   */
-  Publisher: string
-
-  /**
-   * The provider type of the model.
-   */
-  ProviderType: string
-
-  /**
-   * The prompt template associated with the model.
-   */
-  PromptTemplate: Record<string, string>
-}
diff --git a/sdk/js/test/base.test.ts b/sdk/js/test/base.test.ts
index 00508415..8a5ab221 100644
--- a/sdk/js/test/base.test.ts
+++ b/sdk/js/test/base.test.ts
@@ -60,7 +60,7 @@ describe('FoundryLocalManager', () => {
       const mockResponse = {
         json: vi.fn().mockResolvedValue([
           {
-            name: 'model_name',
+            name: 'model_name:1',
             displayName: 'model_name',
             modelType: 'ONNX',
             providerType: 'AzureFoundry',
@@ -77,6 +77,8 @@ describe('FoundryLocalManager', () => {
             license: 'MIT',
             licenseDescription: 'This model is provided under the License Terms available at ...',
             parentModelUri: 'azureml://registries/azureml/models/model_parent/versions/1',
+            maxOutputTokens: 1024,
+            minFLVersion: '1.0.0',
           },
         ]),
       }
@@ -89,7 +91,7 @@ describe('FoundryLocalManager', () => {
       expect(models).toHaveLength(1)
       expect(models[0]).toEqual({
         alias: 'model_alias',
-        id: 'model_name',
+        id: 'model_name:1',
         version: '1',
         runtime: ExecutionProvider.CPU,
         uri: 'azureml://registries/azureml/models/model_name/versions/1',
@@ -107,7 +109,7 @@ describe('FoundryLocalManager', () => {
       manager['catalogList'] = [
         {
           alias: 'model_alias',
-          id: 'model_name',
+          id: 'model_name:1',
           version: '1',
           runtime: ExecutionProvider.CPU,
           uri: 'azureml://registries/azureml/models/model_name/versions/1',
@@ -146,45 +148,55 @@ describe('FoundryLocalManager', () => {
       manager['catalogList'] = [
         // eneric-gpu, generic-cpu
         {
-          id: 'model-1-generic-gpu',
+          id: 'model-1-generic-gpu:1',
           runtime: ExecutionProvider.WEBGPU,
           alias: 'model-1',
         } as any,
         {
-          id: 'model-1-generic-cpu',
+          id: 'model-1-generic-cpu:1',
+          runtime: ExecutionProvider.CPU,
+          alias: 'model-1',
+        },
+        {
+          id: 'model-1-generic-cpu:2',
           runtime: ExecutionProvider.CPU,
           alias: 'model-1',
         },
         // npu, generic-cpu
         {
-          id: 'model-2-npu',
+          id: 'model-2-npu:1',
           runtime: ExecutionProvider.QNN,
           alias: 'model-2',
         },
         {
-          id: 'model-2-generic-cpu',
+          id: 'model-2-npu:2',
+          runtime: ExecutionProvider.QNN,
+          alias: 'model-2',
+        },
+        {
+          id: 'model-2-generic-cpu:1',
           runtime: ExecutionProvider.CPU,
           alias: 'model-2',
         },
         // cuda-gpu, generic-gpu, generic-cpu
         {
-          id: 'model-3-cuda-gpu',
+          id: 'model-3-cuda-gpu:1',
           runtime: ExecutionProvider.CUDA,
           alias: 'model-3',
         },
         {
-          id: 'model-3-generic-gpu',
+          id: 'model-3-generic-gpu:1',
           runtime: ExecutionProvider.WEBGPU,
           alias: 'model-3',
         },
         {
-          id: 'model-3-generic-cpu',
+          id: 'model-3-generic-cpu:1',
           runtime: ExecutionProvider.CPU,
           alias: 'model-3',
         },
         // generic-cpu
         {
-          id: 'model-4-generic-cpu',
+          id: 'model-4-generic-cpu:1',
           runtime: ExecutionProvider.CPU,
           alias: 'model-4',
         },
@@ -192,26 +204,26 @@ describe('FoundryLocalManager', () => {
     })
 
     it('should return model info by id', async () => {
-      expect((await manager.getModelInfo('model-1-generic-gpu'))?.id).toBe('model-1-generic-gpu')
-      expect((await manager.getModelInfo('model-1-generic-cpu'))?.id).toBe('model-1-generic-cpu')
+      expect((await manager.getModelInfo('model-1-generic-gpu'))?.id).toBe('model-1-generic-gpu:1')
+      expect((await manager.getModelInfo('model-1-generic-cpu'))?.id).toBe('model-1-generic-cpu:2')
     })
 
     it('should return model info by alias on Windows', async () => {
       vi.spyOn(process, 'platform', 'get').mockReturnValue('win32')
 
-      expect((await manager.getModelInfo('model-1'))?.id).toBe('model-1-generic-cpu') // cpu is preferred over webgpu
-      expect((await manager.getModelInfo('model-2'))?.id).toBe('model-2-npu') // npu most preferred
-      expect((await manager.getModelInfo('model-3'))?.id).toBe('model-3-cuda-gpu') // cuda most preferred
-      expect((await manager.getModelInfo('model-4'))?.id).toBe('model-4-generic-cpu') // generic-cpu
+      expect((await manager.getModelInfo('model-1'))?.id).toBe('model-1-generic-cpu:2') // cpu is preferred over webgpu
+      expect((await manager.getModelInfo('model-2'))?.id).toBe('model-2-npu:2') // npu most preferred
+      expect((await manager.getModelInfo('model-3'))?.id).toBe('model-3-cuda-gpu:1') // cuda most preferred
+      expect((await manager.getModelInfo('model-4'))?.id).toBe('model-4-generic-cpu:1') // generic-cpu
     })
 
     it('should return model info by alias on non-Windows', async () => {
       vi.spyOn(process, 'platform', 'get').mockReturnValue('linux')
 
-      expect((await manager.getModelInfo('model-1'))?.id).toBe('model-1-generic-gpu') // webgpu is preferred over cpu
-      expect((await manager.getModelInfo('model-2'))?.id).toBe('model-2-npu') // npu most preferred
-      expect((await manager.getModelInfo('model-3'))?.id).toBe('model-3-cuda-gpu') // cuda most preferred
-      expect((await manager.getModelInfo('model-4'))?.id).toBe('model-4-generic-cpu') // generic-cpu
+      expect((await manager.getModelInfo('model-1'))?.id).toBe('model-1-generic-gpu:1') // webgpu is preferred over cpu
+      expect((await manager.getModelInfo('model-2'))?.id).toBe('model-2-npu:2') // npu most preferred
+      expect((await manager.getModelInfo('model-3'))?.id).toBe('model-3-cuda-gpu:1') // cuda most preferred
+      expect((await manager.getModelInfo('model-4'))?.id).toBe('model-4-generic-cpu:1') // generic-cpu
     })
 
     it('should return null for non-existent model', async () => {
@@ -222,7 +234,7 @@ describe('FoundryLocalManager', () => {
 
     it('should throw error for non-existent model when throwOnNotFound is true', async () => {
       await expect(manager.getModelInfo('non_existent', true)).rejects.toThrow(
-        'Model with alias or ID non_existent not found in the catalog',
+        'Model with alias or ID \'non_existent\' not found in the catalog',
       )
     })
   })
@@ -384,6 +396,169 @@ describe('FoundryLocalManager', () => {
     })
   })
 
+  describe('isModelUpgradeable', () => {
+    it('returns true if model is not cached', async () => {
+      vi.spyOn(manager, 'getModelInfo').mockResolvedValue({
+        id: 'model-3-cuda-gpu:1',
+        alias: 'model-3',
+      } as any)
+
+      vi.spyOn(manager, 'listCachedModels').mockResolvedValue([])
+
+      vi.spyOn(manager, 'listCatalogModels').mockResolvedValue([
+        {
+          id: 'model-3-cuda-gpu:1',
+          alias: 'model-3',
+          runtime: 'CUDAExecutionProvider',
+        } as any,
+      ])
+
+      const result = await manager.isModelUpgradeable('model-3')
+      expect(result).toBe(true)
+    })
+
+    it('returns true if model is cached but older version', async () => {
+      vi.spyOn(manager, 'getModelInfo').mockResolvedValue({
+        id: 'model-2-npu:2',
+        alias: 'model-2',
+        runtime: ExecutionProvider.QNN,
+      } as any)
+
+      vi.spyOn(manager, 'listCachedModels').mockResolvedValue([
+        { id: 'model-2-npu:1' } as any,
+      ])
+
+      vi.spyOn(manager, 'listCatalogModels').mockResolvedValue([
+        {
+          id: 'model-2-npu:2',
+          alias: 'model-2',
+          runtime: ExecutionProvider.QNN,
+        } as any,
+      ])
+
+      const result = await manager.isModelUpgradeable('model-2-npu:1')
+      expect(result).toBe(true)
+    })
+
+    it('returns false if model is cached and latest version', async () => {
+      vi.spyOn(manager, 'getModelInfo').mockResolvedValue({
+        id: 'model-4-generic-gpu:1',
+        alias: 'model-4',
+        runtime: ExecutionProvider.WEBGPU,
+      } as any)
+
+      vi.spyOn(manager, 'listCachedModels').mockResolvedValue([
+        { id: 'model-4-generic-gpu:1' } as any,
+      ])
+
+      vi.spyOn(manager, 'listCatalogModels').mockResolvedValue([
+        {
+          id: 'model-4-generic-gpu:1',
+          alias: 'model-4',
+          runtime: ExecutionProvider.WEBGPU,
+        } as any,
+      ])
+
+      const result = await manager.isModelUpgradeable('model-4')
+      expect(result).toBe(false)
+    })
+
+    it('returns false if model version is invalid', async () => {
+      vi.spyOn(manager, 'getModelInfo').mockResolvedValue({
+        id: 'model-invalid-version',
+        alias: 'model-invalid',
+        runtime: ExecutionProvider.CPU,
+      } as any)
+
+      vi.spyOn(manager, 'listCachedModels').mockResolvedValue([])
+
+      // simulate getVersion returning -1
+      vi.spyOn(manager as any, 'getVersion').mockReturnValue(-1)
+
+      vi.spyOn(manager, 'listCatalogModels').mockResolvedValue([
+        {
+          id: 'model-invalid-version',
+          alias: 'model-invalid',
+          runtime: ExecutionProvider.CPU,
+        } as any,
+      ])
+
+      const result = await manager.isModelUpgradeable('model-invalid-version')
+      expect(result).toBe(false)
+    })
+  })
+
+  describe('upgradeModel', () => {
+    it('downloads model if not in cache', async () => {
+      const mockModel = {
+        id: 'model-3-cuda-gpu:1',
+        alias: 'model-3',
+        runtime: ExecutionProvider.CUDA,
+        uri: 'https://example.com/model',
+        publisher: 'Microsoft',
+        provider: 'AzureFoundry',
+        promptTemplate: {},
+      } as any
+
+      vi.spyOn(manager, 'getLatestModelInfo').mockResolvedValue(mockModel)
+      const downloadSpy = vi.spyOn(manager, 'downloadModel').mockResolvedValue(mockModel)
+
+      const result = await manager.upgradeModel('model-3')
+
+      expect(manager.getLatestModelInfo).toHaveBeenCalledWith('model-3', true)
+      expect(downloadSpy).toHaveBeenCalledWith('model-3-cuda-gpu:1', undefined, false, undefined)
+      expect(result).toEqual(mockModel)
+    })
+
+    it('downloads latest version if older version is in cache', async () => {
+      const mockModel = {
+        id: 'model-2-npu:2',
+        alias: 'model-2',
+        runtime: ExecutionProvider.QNN,
+        uri: 'https://example.com/model2',
+        publisher: 'Microsoft',
+        provider: 'AzureFoundry',
+        promptTemplate: {},
+      } as any
+
+      vi.spyOn(manager, 'getLatestModelInfo').mockResolvedValue(mockModel)
+      const downloadSpy = vi.spyOn(manager, 'downloadModel').mockResolvedValue(mockModel)
+
+      const result = await manager.upgradeModel('model-2-npu:1')
+
+      expect(manager.getLatestModelInfo).toHaveBeenCalledWith('model-2-npu:1', true)
+      expect(downloadSpy).toHaveBeenCalledWith('model-2-npu:2', undefined, false, undefined)
+      expect(result).toEqual(mockModel)
+    })
+
+    it('does not redownload model if already latest', async () => {
+      const mockModel = {
+        id: 'model-4-generic-gpu:1',
+        alias: 'model-4',
+        runtime: ExecutionProvider.WEBGPU,
+        uri: 'https://example.com/model4',
+        publisher: 'Microsoft',
+        provider: 'AzureFoundry',
+        promptTemplate: {},
+      } as any
+
+      vi.spyOn(manager, 'getLatestModelInfo').mockResolvedValue(mockModel)
+      const downloadSpy = vi.spyOn(manager, 'downloadModel').mockResolvedValue(mockModel)
+
+      const result = await manager.upgradeModel('model-4')
+
+      expect(manager.getLatestModelInfo).toHaveBeenCalledWith('model-4', true)
+      expect(downloadSpy).toHaveBeenCalledWith('model-4-generic-gpu:1', undefined, false, undefined)
+      expect(result).toEqual(mockModel)
+    })
+
+    it('throws error if getLatestModelInfo fails', async () => {
+      vi.spyOn(manager, 'getLatestModelInfo').mockRejectedValue(new Error('Not found'))
+
+      await expect(manager.upgradeModel('nonexistent-model')).rejects.toThrow('Not found')
+    })
+  })
+
   describe('loadModel', () => {
     it('should load model with default TTL', async () => {
       // Setup model info

From a21e8ec20c9d8a9732740c56ae5db977997c858d Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Sun, 27 Jul 2025 21:35:44 -0700
Subject: [PATCH 10/15] update C# API to support versioning

---
 sdk/cs/src/FoundryLocalManager.cs             | 241 ++++++++++++------
 sdk/cs/src/FoundryModelInfo.cs                |   6 +
 sdk/cs/src/Microsoft.AI.Foundry.Local.csproj  |   2 +-
 .../FoundryLocalManagerTest.cs                | 191 +++++++++++++-
 4 files changed, 363 insertions(+), 77 deletions(-)

diff --git a/sdk/cs/src/FoundryLocalManager.cs b/sdk/cs/src/FoundryLocalManager.cs
index b71cf9a1..b1a817d0 100644
--- a/sdk/cs/src/FoundryLocalManager.cs
+++ b/sdk/cs/src/FoundryLocalManager.cs
@@ -171,10 +171,38 @@ public void RefreshCatalog()
 
     public async Task<ModelInfo?> GetModelInfoAsync(string aliasOrModelId, CancellationToken ct = default)
     {
-        var dictionary = await GetCatalogDictAsync(ct);
+        var catalog = await GetCatalogDictAsync(ct);
+        ModelInfo? modelInfo = null;
 
-        dictionary.TryGetValue(aliasOrModelId, out ModelInfo? model);
-        return model;
+        // Direct match (id with version or alias)
+        if (catalog.TryGetValue(aliasOrModelId, out var directMatch))
+        {
+            modelInfo = directMatch;
+        }
+        else if (!aliasOrModelId.Contains(':'))
+        {
+            // If no direct match and aliasOrModelId does not contain a version suffix
+            var prefix = aliasOrModelId + ":";
+            var bestVersion = -1;
+
+            foreach (var kvp in catalog)
+            {
+                var key = kvp.Key;
+                ModelInfo model = kvp.Value;
+
+                if (key.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
+                {
+                    var version = GetVersion(key);
+                    if (version > bestVersion)
+                    {
+                        bestVersion = version;
+                        modelInfo = model;
+                    }
+                }
+            }
+        }
+
+        return modelInfo;
     }
 
     public async Task<string> GetCacheLocationAsync(CancellationToken ct = default)
@@ -247,64 +275,49 @@ public async Task<List<ModelInfo>> ListCachedModelsAsync(CancellationToken ct =
 
     public async Task<bool> IsModelUpgradeableAsync(string aliasOrModelId, CancellationToken ct = default)
     {
-        var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct)
-            ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog.");
-
-        await StartServiceAsync(ct);
-        var response = await _serviceClient!.GetAsync($"/openai/upgradeable/{modelInfo.ModelId}", ct);
-        response.EnsureSuccessStatusCode();
-
-        var jsonResponse = await response.Content.ReadAsStringAsync(ct);
-        using var jsonDoc = JsonDocument.Parse(jsonResponse);
-        return jsonDoc.RootElement.GetProperty("upgradeable").GetBoolean();
-    }
-
-    public async Task<ModelInfo?> UpgradeModelAsync(
-        string aliasOrModelId,
-        string? token = null,
-        CancellationToken ct = default)
-    {
-        var modelInfo = await GetModelInfoAsync(aliasOrModelId, ct)
-            ?? throw new InvalidOperationException($"Model {aliasOrModelId} not found in catalog.");
-
-        var request = new UpgradeRequest
+        var modelInfo = await GetLatestModelInfoAsync(aliasOrModelId, ct);
+        if (modelInfo == null)
         {
-            Model = new UpgradeRequest.UpgradeBody
-            {
-                Name = modelInfo.ModelId,
-                Uri = modelInfo.Uri,
-                Publisher = modelInfo.Publisher,
-                ProviderType = modelInfo.ProviderType == "AzureFoundry" ? "AzureFoundryLocal" : modelInfo.ProviderType,
-                PromptTemplate = modelInfo.PromptTemplate
-            },
-            Token = token ?? "",
-            IgnorePipeReport = true
-        };
+            return false; // Model not found in the catalog
+        }
 
-        var response = await _serviceClient!.PostAsJsonAsync("/openai/upgrade", request, ct);
-        response.EnsureSuccessStatusCode();
-        var responseBody = await response.Content.ReadAsStringAsync(ct);
+        var latestVersion = GetVersion(modelInfo.ModelId);
+        if (latestVersion == -1)
+        {
+            return false; // Invalid version format in model ID
+        }
 
-        // Find the last '{' to get the start of the JSON object
-        var jsonStart = responseBody.LastIndexOf('{');
-        if (jsonStart == -1)
+        var cachedModels = await ListCachedModelsAsync(ct);
+        foreach (var cachedModel in cachedModels)
         {
-            throw new InvalidOperationException("No JSON object found in response.");
+            if (cachedModel.ModelId.Equals(modelInfo.ModelId, StringComparison.OrdinalIgnoreCase) &&
+                GetVersion(cachedModel.ModelId) == latestVersion)
+            {
+                // Cached model is already at latest version
+                return false;
+            }
         }
 
-        var jsonPart = responseBody[jsonStart..];
+        // Latest version not in cache - upgrade available
+        return true;
 
-        // Parse the JSON part
-        using var jsonDoc = JsonDocument.Parse(jsonPart);
-        var success = jsonDoc.RootElement.GetProperty("success").GetBoolean();
-        var errorMessage = jsonDoc.RootElement.GetProperty("errorMessage").GetString();
+    }
 
-        if (!success)
+    public async Task<ModelInfo?> UpgradeModelAsync(string aliasOrModelId, string? token = null, CancellationToken ct = default)
+    {
+        // Get the latest model info; throw if not found
+        var modelInfo = await GetLatestModelInfoAsync(aliasOrModelId, ct)
+            ?? throw new ArgumentException($"Model '{aliasOrModelId}' was not found in the catalog.");
+
+        // Attempt to download the model
+        try
         {
-            throw new InvalidOperationException($"Failed to download model: {errorMessage}");
+            return await DownloadModelAsync(modelInfo.ModelId, token, false, ct);
+        }
+        catch (Exception ex)
+        {
+            throw new InvalidOperationException($"Failed to upgrade model '{aliasOrModelId}'.", ex);
         }
-
-        return modelInfo;
     }
 
     public async Task<ModelInfo> LoadModelAsync(string aliasOrModelId, TimeSpan? timeout = null, CancellationToken ct = default)
@@ -497,39 +510,125 @@ private async Task<List<ModelInfo>> FetchModelInfosAsync(IEnumerable<string> ali
 
     private async Task<Dictionary<string, ModelInfo>> GetCatalogDictAsync(CancellationToken ct = default)
     {
-        if (_catalogDictionary == null)
+        if (_catalogDictionary != null)
+        {
+            return _catalogDictionary;
+        }
+
+        var dict = new Dictionary<string, ModelInfo>(StringComparer.OrdinalIgnoreCase);
+        var models = await ListCatalogModelsAsync(ct);
+        foreach (var model in models)
+        {
+            dict[model.ModelId] = model;
+        }
+
+        var aliasCandidates = new Dictionary<string, List<ModelInfo>>(StringComparer.OrdinalIgnoreCase);
+        foreach (var model in models)
+        {
+            if (!string.IsNullOrWhiteSpace(model.Alias))
+            {
+                if (!aliasCandidates.TryGetValue(model.Alias, out var list))
+                {
+                    list = [];
+                    aliasCandidates[model.Alias] = list;
+                }
+                list.Add(model);
+            }
+        }
+
+        // For each alias, choose the best candidate based on _priorityMap and version
+        foreach (var kvp in aliasCandidates)
         {
-            var dict = new Dictionary<string, ModelInfo>(StringComparer.OrdinalIgnoreCase);
-            var models = await ListCatalogModelsAsync(ct);
-            foreach (var model in models)
+            var alias = kvp.Key;
+            List<ModelInfo> candidates = kvp.Value;
+
+            ModelInfo bestCandidate = candidates.Aggregate((best, current) =>
             {
-                dict[model.ModelId] = model;
+                // Get priorities or max int if not found
+                var bestPriority = _priorityMap.TryGetValue(best.Runtime.ExecutionProvider, out var bp) ? bp : int.MaxValue;
+                var currentPriority = _priorityMap.TryGetValue(current.Runtime.ExecutionProvider, out var cp) ? cp : int.MaxValue;
 
-                if (!string.IsNullOrWhiteSpace(model.Alias))
+                if (currentPriority < bestPriority)
                 {
-                    if (!dict.TryGetValue(model.Alias, out var existing))
-                    {
-                        dict[model.Alias] = model;
-                    }
-                    else
-                    {
-                        var currentPriority = _priorityMap.TryGetValue(model.Runtime.ExecutionProvider, out var cp) ? cp : int.MaxValue;
-                        var existingPriority = _priorityMap.TryGetValue(existing.Runtime.ExecutionProvider, out var ep) ? ep : int.MaxValue;
+                    return current;
+                }
 
-                        if (currentPriority < existingPriority)
-                        {
-                            dict[model.Alias] = model;
-                        }
+                if (currentPriority == bestPriority)
+                {
+                    var bestVersion = GetVersion(best.ModelId);
+                    var currentVersion = GetVersion(current.ModelId);
+                    if (currentVersion > bestVersion)
+                    {
+                        return current;
                     }
                 }
-            }
 
-            _catalogDictionary = dict;
+                return best;
+            });
+
+            dict[alias] = bestCandidate;
         }
 
+        _catalogDictionary = dict;
         return _catalogDictionary;
     }
 
+    public async Task<ModelInfo?> GetLatestModelInfoAsync(string aliasOrModelId, CancellationToken ct = default)
+    {
+        if (string.IsNullOrEmpty(aliasOrModelId))
+        {
+            return null;
+        }
+
+        var catalog = await GetCatalogDictAsync(ct);
+
+        // If alias or id without version
+        if (!aliasOrModelId.Contains(':'))
+        {
+            // If exact match in catalog, return it directly
+            if (catalog.TryGetValue(aliasOrModelId, out var model))
+            {
+                return model;
+            }
+
+            // Otherwise, GetModelInfoAsync will get the latest version
+            return await GetModelInfoAsync(aliasOrModelId, ct);
+        }
+        else
+        {
+            // If ID with version, strip version and use GetModelInfoAsync to get the latest version
+            var idWithoutVersion = aliasOrModelId.Split(':')[0];
+            return await GetModelInfoAsync(idWithoutVersion, ct);
+        }
+    }
+
+    /// <summary>
+    /// Extracts the numeric version from a model ID string (e.g. "model-x:3" → 3).
+    /// </summary>
+    /// <param name="modelId">The model ID string.</param>
+    /// <returns>The numeric version, or -1 if not found.</returns>
+    public static int GetVersion(string modelId)
+    {
+        if (string.IsNullOrEmpty(modelId))
+        {
+            return -1;
+        }
+
+        var parts = modelId.Split(':');
+        if (parts.Length == 0)
+        {
+            return -1;
+        }
+
+        var versionPart = parts[^1]; // last element
+        if (int.TryParse(versionPart, out var version))
+        {
+            return version;
+        }
+
+        return -1;
+    }
+
     private static async Task<Uri?> EnsureServiceRunning(CancellationToken ct = default)
     {
         var startInfo = new ProcessStartInfo
diff --git a/sdk/cs/src/FoundryModelInfo.cs b/sdk/cs/src/FoundryModelInfo.cs
index 15b537ba..4cd752e5 100644
--- a/sdk/cs/src/FoundryModelInfo.cs
+++ b/sdk/cs/src/FoundryModelInfo.cs
@@ -99,6 +99,12 @@ public record ModelInfo
 
     [JsonPropertyName("parentModelUri")]
     public string ParentModelUri { get; init; } = default!;
+
+    [JsonPropertyName("maxOutputTokens")]
+    public long MaxOutputTokens { get; init; }
+
+    [JsonPropertyName("minFLVersion")]
+    public string MinFLVersion { get; init; } = default!;
 }
 
 internal sealed class DownloadRequest
diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj
index f9478889..75753c59 100644
--- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj
+++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj
@@ -14,7 +14,7 @@
       <ImplicitUsings>enable</ImplicitUsings>
       <Nullable>enable</Nullable>
       <GeneratePackageOnBuild>True</GeneratePackageOnBuild>
-      <Version>0.1.0</Version>
+      <Version>0.2.0</Version>
     </PropertyGroup>
 
     <PropertyGroup>
diff --git a/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs b/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs
index 6fc380e3..84658302 100644
--- a/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs
+++ b/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs
@@ -131,7 +131,7 @@ public async Task GetModelInfoAsync_ReturnsModel_WhenModelExists()
     public async Task GetModelInfoAsync_CudaHigherPriorityThanCpuAndWebgpu()
     {
         // GIVEN
-        var phi4MiniGenericCpuModelId = "Phi-4-mini-instruct-generic-cpu";
+        var phi4MiniGenericCpuModelId = "Phi-4-mini-instruct-generic-cpu:1";
         var phi4MiniAlias = "phi-4-mini";
         var phi4MiniGenericCpuModel = new ModelInfo
         {
@@ -146,7 +146,7 @@ public async Task GetModelInfoAsync_CudaHigherPriorityThanCpuAndWebgpu()
             }
         };
 
-        var phi4MiniWebGpuModelId = "Phi-4-mini-instruct-webgpu";
+        var phi4MiniWebGpuModelId = "Phi-4-mini-instruct-webgpu:1";
         var phi4MiniWebGpuModel = new ModelInfo
         {
             ModelId = phi4MiniWebGpuModelId,
@@ -160,7 +160,7 @@ public async Task GetModelInfoAsync_CudaHigherPriorityThanCpuAndWebgpu()
             }
         };
 
-        var phi4MiniCudaModelId = "Phi-4-mini-instruct-cuda-gpu";
+        var phi4MiniCudaModelId = "Phi-4-mini-instruct-cuda-gpu:1";
         var phi4MiniCudaModel = new ModelInfo
         {
             ModelId = phi4MiniCudaModelId,
@@ -202,7 +202,7 @@ public async Task GetModelInfoAsync_CudaHigherPriorityThanCpuAndWebgpu()
     public async Task GetModelInfoAsync_QnnHigherPriorityThanCuda()
     {
         // GIVEN
-        var phi4MiniQnnModelId = "Phi-4-mini-instruct-qnn";
+        var phi4MiniQnnModelId = "Phi-4-mini-instruct-qnn:1";
         var phi4MiniAlias = "phi-4-mini";
         var phi4MiniQnnModel = new ModelInfo
         {
@@ -217,7 +217,7 @@ public async Task GetModelInfoAsync_QnnHigherPriorityThanCuda()
             }
         };
 
-        var phi4MiniCudaModelId = "Phi-4-mini-instruct-cuda-gpu";
+        var phi4MiniCudaModelId = "Phi-4-mini-instruct-cuda-gpu:1";
         var phi4MiniCudaModel = new ModelInfo
         {
             ModelId = phi4MiniCudaModelId,
@@ -688,6 +688,187 @@ public async Task DownloadModelWithProgressAsync_DownloadErrorProvidesErrorProgr
         Assert.Equal("Download error occurred.", p.ErrorMessage);
     }
 
+    [Fact]
+    public async Task UpgradeModelAsync_Success_ReturnsDownloadedModel()
+    {
+        var alias = "model-1";
+        var modelId = "model-1:2";
+        var token = "token";
+
+        // Mock /foundry/list for catalog models
+        var catalogModels = new List<ModelInfo>
+        {
+            new() {
+                ModelId = modelId,
+                Alias = alias,
+                Uri = "http://model.uri",
+                ProviderType = "openai",
+                Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = ExecutionProvider.CPUExecutionProvider }
+            }
+        };
+        var catalogJson = JsonSerializer.Serialize(catalogModels);
+        _mockHttp.When(HttpMethod.Get, "/foundry/list")
+                 .Respond("application/json", catalogJson);
+
+        var downloadResponseJson = "{\"success\": true, \"errorMessage\": null}";
+        _mockHttp.When(HttpMethod.Post, "/openai/download")
+                 .Respond("application/json", downloadResponseJson);
+
+        _mockHttp.When(HttpMethod.Get, "/openai/models")
+                 .Respond("application/json", "[]");
+
+        // Act
+        var result = await _manager.UpgradeModelAsync(alias, token);
+
+        // Assert
+        Assert.NotNull(result);
+        Assert.Equal(modelId, result.ModelId);
+    }
+
+    [Fact]
+    public async Task UpgradeModelAsync_ModelNotFound_ThrowsArgumentException()
+    {
+        var alias = "missing-model";
+
+        // Mock /foundry/list to return an empty list (no models)
+        _mockHttp.When(HttpMethod.Get, "/foundry/list")
+                 .Respond("application/json", "[]");
+
+        // Mock /openai/models for cached models (empty)
+        _mockHttp.When(HttpMethod.Get, "/openai/models")
+                 .Respond("application/json", "[]");
+
+        // We don't expect download to be called, but mock anyway if needed
+        _mockHttp.When(HttpMethod.Post, "/openai/download")
+                 .Respond("application/json", "{\"success\": true, \"errorMessage\": null}");
+
+        // Act & Assert
+        var ex = await Assert.ThrowsAsync<ArgumentException>(() => _manager.UpgradeModelAsync(alias));
+        Assert.Contains("not found", ex.Message, StringComparison.OrdinalIgnoreCase);
+    }
+
+    [Fact]
+
+    public async Task UpgradeModelAsync_DownloadReturnsNull_ThrowsInvalidOperationException()
+    {
+        var alias = "model-1";
+        var modelId = "model-1:2";
+
+        // Mock /foundry/list to return the model info (so it's found)
+        var catalogModels = new List<ModelInfo>
+        {
+            new()
+            {
+                ModelId = modelId,
+                Alias = alias,
+                Uri = "http://model.uri",
+                ProviderType = "openai",
+                Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = ExecutionProvider.CPUExecutionProvider }
+            }
+        };
+        var catalogJson = JsonSerializer.Serialize(catalogModels);
+        _mockHttp.When(HttpMethod.Get, "/foundry/list")
+                 .Respond("application/json", catalogJson);
+
+        // Mock /openai/models (empty cached)
+        _mockHttp.When(HttpMethod.Get, "/openai/models")
+                 .Respond("application/json", "[]");
+
+        // Mock /openai/download to simulate failure (success: false)
+        var failedDownloadResponseJson = "{\"success\": false, \"errorMessage\": \"Simulated download failure.\"}";
+        _mockHttp.When(HttpMethod.Post, "/openai/download")
+                 .Respond("application/json", failedDownloadResponseJson);
+
+        // Act & Assert
+        var ex = await Assert.ThrowsAsync<InvalidOperationException>(() => _manager.UpgradeModelAsync(alias));
+        Assert.Contains("failed to upgrade", ex.Message, StringComparison.OrdinalIgnoreCase);
+    }
+
+    [Fact]
+    public async Task IsModelUpgradeableAsync_ReturnsTrue_WhenNewerVersionAvailable()
+    {
+        var alias = "model-1";
+        var latestModelId = "model-1:2";
+
+        // Catalog contains the latest model version
+        var catalogModels = new List<ModelInfo>
+        {
+            new()
+            {
+                ModelId = latestModelId,
+                Alias = alias,
+                Uri = "http://model.uri",
+                ProviderType = "openai",
+                Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = ExecutionProvider.CPUExecutionProvider }
+            }
+        };
+        var catalogJson = JsonSerializer.Serialize(catalogModels);
+        _mockHttp.When(HttpMethod.Get, "/foundry/list")
+                 .Respond("application/json", catalogJson);
+
+        // Cached models contain an older version
+        var cachedModels = new[] { "model-1:1" };
+        var cachedModelsJson = JsonSerializer.Serialize(cachedModels);
+        _mockHttp.When(HttpMethod.Get, "/openai/models")
+                 .Respond("application/json", cachedModelsJson);
+
+        // Act
+        var result = await _manager.IsModelUpgradeableAsync(alias);
+
+        // Assert
+        Assert.True(result);
+    }
+
+    [Fact]
+    public async Task IsModelUpgradeableAsync_ReturnsFalse_WhenAlreadyLatestVersionCached()
+    {
+        var alias = "model-1";
+        var latestModelId = "model-1:2";
+
+        // Catalog with latest version
+        var catalogModels = new List<ModelInfo>
+        {
+            new()
+            {
+                ModelId = latestModelId,
+                Alias = alias,
+                Uri = "http://model.uri",
+                ProviderType = "openai",
+                Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = ExecutionProvider.CPUExecutionProvider }
+            }
+        };
+        var catalogJson = JsonSerializer.Serialize(catalogModels);
+        _mockHttp.When(HttpMethod.Get, "/foundry/list")
+                 .Respond("application/json", catalogJson);
+
+        // Cached model is already at the latest version
+        var cachedModels = new[] { latestModelId };
+        var cachedModelsJson = JsonSerializer.Serialize(cachedModels);
+        _mockHttp.When(HttpMethod.Get, "/openai/models")
+                 .Respond("application/json", cachedModelsJson);
+
+        // Act
+        var result = await _manager.IsModelUpgradeableAsync(alias);
+
+        // Assert
+        Assert.False(result);
+    }
+
+    [Fact]
+    public async Task IsModelUpgradeableAsync_ReturnsFalse_WhenModelNotFoundInCatalog()
+    {
+        var alias = "missing-model";
+
+        _mockHttp.When(HttpMethod.Get, "/foundry/list")
+                 .Respond("application/json", "[]");
+
+        // Act
+        var result = await _manager.IsModelUpgradeableAsync(alias);
+
+        // Assert
+        Assert.False(result);
+    }
+
     public void Dispose()
     {
         _client.Dispose();

From 6477a9f39110b5f82bf3ce34f8c93c01e163d29d Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Sun, 27 Jul 2025 23:10:19 -0700
Subject: [PATCH 11/15] update Rust API to support versioning

---
 sdk/rust/Cargo.toml            |   2 +-
 sdk/rust/src/api.rs            | 240 +++++++++++++++++++--------------
 sdk/rust/src/client.rs         |   2 +-
 sdk/rust/src/lib.rs            |  10 +-
 sdk/rust/src/models.rs         |   4 +
 sdk/rust/tests/mock_service.rs |  31 +++--
 sdk/rust/tests/test_api.rs     |  96 ++++++++++---
 7 files changed, 253 insertions(+), 132 deletions(-)

diff --git a/sdk/rust/Cargo.toml b/sdk/rust/Cargo.toml
index 3fb31e01..0fffaa0b 100644
--- a/sdk/rust/Cargo.toml
+++ b/sdk/rust/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "foundry-local"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2021"
 description = "SDK for Microsoft Foundry Local service"
 license = "MIT"
diff --git a/sdk/rust/src/api.rs b/sdk/rust/src/api.rs
index 0adabd29..446c4186 100644
--- a/sdk/rust/src/api.rs
+++ b/sdk/rust/src/api.rs
@@ -83,6 +83,18 @@ impl FoundryLocalManagerBuilder {
     }
 }
 
+/// Extract numeric version from ID (e.g. model-x:3 → 3)
+///
+/// # Returns
+///
+/// Numeric version as i32, or -1 if not found.
+fn get_version(model_id: &str) -> i32 {
+    match model_id.rsplit(':').next() {
+        Some(version_str) => version_str.parse::<i32>().unwrap_or(-1),
+        None => -1,
+    }
+}
+
 impl FoundryLocalManager {
     /// Create a new builder for FoundryLocalManager.
     pub fn builder() -> FoundryLocalManagerBuilder {
@@ -194,28 +206,23 @@ impl FoundryLocalManager {
     ///
     /// Dictionary of catalog models.
     async fn get_catalog_dict(&mut self) -> Result<&HashMap<String, FoundryModelInfo>> {
-        if self.catalog_dict.is_some() {
-            return Ok(self.catalog_dict.as_ref().unwrap());
+        if let Some(ref dict) = self.catalog_dict {
+            return Ok(dict);
         }
 
         let catalog_models = self.list_catalog_models().await?;
         let mut catalog_dict = HashMap::new();
-        let mut alias_candidates: HashMap<String, Vec<&FoundryModelInfo>> = HashMap::new();
+        let mut alias_candidates: HashMap<String, Vec<FoundryModelInfo>> = HashMap::new();
 
-        // Create dictionary of models by ID
-        for model in catalog_models {
+        for model in catalog_models.iter() {
             catalog_dict.insert(model.id.clone(), model.clone());
-        }
-
-        // Group models by alias
-        for model in catalog_models {
             alias_candidates
                 .entry(model.alias.clone())
                 .or_default()
-                .push(model);
+                .push(model.clone());
         }
 
-        // Define the preferred order of execution providers
+        // Set up priority order
         let mut preferred_order = vec![
             ExecutionProvider::QNN,
             ExecutionProvider::CUDA,
@@ -224,26 +231,33 @@ impl FoundryLocalManager {
         ];
 
         if cfg!(not(target_os = "windows")) {
-            // Adjust order for non-Windows platforms
-            preferred_order.retain(|p| !matches!(p, ExecutionProvider::CPU));
+            // Move CPU to the end for non-Windows
+            preferred_order.retain(|p| *p != ExecutionProvider::CPU);
             preferred_order.push(ExecutionProvider::CPU);
         }
 
-        let priority_map: HashMap<_, _> = preferred_order
+        let priority_map: HashMap<ExecutionProvider, usize> = preferred_order
             .into_iter()
             .enumerate()
             .map(|(i, provider)| (provider, i))
             .collect();
 
-        // Choose the preferred model for each alias
+        // Choose the best candidate per alias
         for (alias, candidates) in alias_candidates {
-            if let Some(preferred) = candidates.into_iter().min_by_key(|model| {
-                priority_map
-                    .get(&model.runtime)
-                    .copied()
-                    .unwrap_or(usize::MAX)
+            if let Some(best) = candidates.into_iter().max_by(|a, b| {
+                // Compare priority (lower index = higher priority)
+                let pa = priority_map.get(&a.runtime).copied().unwrap_or(usize::MAX);
+                let pb = priority_map.get(&b.runtime).copied().unwrap_or(usize::MAX);
+                match pa.cmp(&pb) {
+                    std::cmp::Ordering::Less => std::cmp::Ordering::Greater,
+                    std::cmp::Ordering::Greater => std::cmp::Ordering::Less,
+                    std::cmp::Ordering::Equal => {
+                        // Same priority → compare version
+                        get_version(&a.id).cmp(&get_version(&b.id))
+                    }
+                }
             }) {
-                catalog_dict.insert(alias, preferred.clone());
+                catalog_dict.insert(alias, best);
             }
         }
 
@@ -274,16 +288,80 @@ impl FoundryLocalManager {
     ) -> Result<FoundryModelInfo> {
         let catalog_dict = self.get_catalog_dict().await?;
 
-        match catalog_dict.get(alias_or_model_id) {
-            Some(model) => Ok(model.clone()),
-            None if raise_on_not_found => Err(anyhow!(
-                "Model {} not found in the catalog",
-                alias_or_model_id
-            )),
-            None => Err(anyhow!(
-                "Model {} not found in the catalog",
-                alias_or_model_id
-            )),
+        // Try to find exact match (alias or id)
+        if let Some(model_info) = catalog_dict.get(alias_or_model_id) {
+            return Ok(model_info.clone());
+        }
+
+        // If alias_or_model_id does NOT contain version suffix (":")
+        if !alias_or_model_id.contains(':') {
+            // Search for model IDs starting with alias_or_model_id + ":"
+            let prefix = format!("{}:", alias_or_model_id);
+            let mut best_version = -1;
+            let mut best_model: Option<FoundryModelInfo> = None;
+
+            for (key, info) in catalog_dict.iter() {
+                if key.starts_with(&prefix) {
+                    let version = get_version(key);
+                    if version > best_version {
+                        best_version = version;
+                        best_model = Some(info.clone());
+                    }
+                }
+            }
+
+            if let Some(model) = best_model {
+                return Ok(model);
+            }
+        }
+
+        if raise_on_not_found {
+            Err(anyhow!("Model {} not found in the catalog", alias_or_model_id))
+        } else {
+            Err(anyhow!("Model {} not found in the catalog", alias_or_model_id))
+        }
+    }
+
+    /// Get the latest model information by alias or model ID.
+    /// The difference from get_model_info is that this method will return the latest version of the model
+    /// even when you pass it a model id that contains a version suffix.
+    ///
+    /// # Arguments
+    ///
+    /// * `alias_or_model_id` - Alias or Model ID. If it is an alias, the most preferred model will be returned.
+    /// * `raise_on_not_found` - If true, raise an error if the model is not found. Default is false.
+    ///
+    /// # Returns
+    ///
+    /// Latest model information, or None if not found and raise_on_not_found is false.
+    pub async fn get_latest_model_info(
+        &mut self,
+        alias_or_model_id: &str,
+        raise_on_not_found: bool,
+    ) -> Result<FoundryModelInfo> {
+        if alias_or_model_id.is_empty() {
+            if raise_on_not_found {
+                return Err(anyhow!("The provided model alias or ID was empty."));
+            } else {
+                return Err(anyhow!("Model alias or ID was empty and raise_on_not_found is false."));
+            }
+        }
+
+        let catalog = self.get_catalog_dict().await?;
+
+        if !alias_or_model_id.contains(':') {
+            // Alias or ID without version suffix
+            if let Some(model) = catalog.get(alias_or_model_id) {
+                // Found directly in catalog (alias or id)
+                Ok(model.clone())
+            } else {
+                // Fallback: try get_model_info which can look for latest version of ID without version
+                self.get_model_info(alias_or_model_id, raise_on_not_found).await
+            }
+        } else {
+            // ID with version suffix: strip version and get latest by ID without version
+            let id_without_version = alias_or_model_id.split(':').next().unwrap_or("");
+            self.get_model_info(id_without_version, raise_on_not_found).await
         }
     }
 
@@ -397,99 +475,65 @@ impl FoundryLocalManager {
         Ok(model_info)
     }
 
-    /// Checks if a newer version of a model is available.
+    /// Check if a newer version of a model is available.
     ///
     /// # Arguments
     ///
-    /// * `alias_or_model_id` - The alias or model ID to check for upgrades.
+    /// * `alias_or_model_id` - Alias or Model ID.
     ///
     /// # Returns
     ///
-    /// True if a newer version is available, otherwise false.
+    /// bool: True if a newer version is available, False otherwise.
     pub async fn is_model_upgradeable(
         &mut self,
-        alias_or_model_id: &str
+        alias_or_model_id: &str,
     ) -> Result<bool> {
-        let model_info = self.get_model_info(alias_or_model_id, true).await?;
-        let url = format!("/openai/upgradeable/{}", model_info.id);
+        info!("Checking if model '{}' is upgradeable", alias_or_model_id);
 
-        let client = self.client()?;
-        let response: Option<Value> = client.get(&url, None).await?;
+        // Get the latest model info (throws if not found)
+        let latest_model_info = self
+            .get_latest_model_info(alias_or_model_id, true)
+            .await?;
+
+        let latest_version = get_version(&latest_model_info.id);
+        if latest_version == -1 {
+            return Ok(false); // Invalid version format
+        }
 
-        let data = response.ok_or_else(|| anyhow!("Failed to check model upgrade availability"))?;
-        data["upgradeable"]
-            .as_bool()
-            .ok_or_else(|| anyhow!("Invalid upgrade response format"))
+        let cached_models = self.list_cached_models().await?;
+        for cached_model in cached_models {
+            if cached_model.id == latest_model_info.id
+                && get_version(&cached_model.id) == latest_version
+            {
+                return Ok(false); // Already cached latest version
+            }
+        }
+
+        Ok(true) // Latest version not in cache
     }
 
-    /// Upgrades a model to its latest version.
+    /// Downloads the latest version of a model to the local cache, if it is not already cached.
     ///
     /// # Arguments
     ///
     /// * `alias_or_model_id` - The alias or model ID to upgrade.
-    /// * `token` - Optional token for authentication.
+    /// * `token` - Optional authentication token.
     ///
-    /// # Returns
+    /// # Errors
     ///
-    /// The upgraded model information.
+    /// Downloaded model information.
     pub async fn upgrade_model(
         &mut self,
         alias_or_model_id: &str,
         token: Option<&str>,
     ) -> Result<FoundryModelInfo> {
-        let model_info = self.get_model_info(alias_or_model_id, true).await?;
-        info!(
-            "Upgrading model: {} ({})",
-            model_info.alias, model_info.id
-        );
-
-        // Create the upgrade body similar to the JS implementation
-        let mut body = serde_json::json!({
-            "model": {
-                "Name": model_info.id,
-                "Uri": model_info.uri,
-                "Publisher": model_info.publisher,
-                "ProviderType": if model_info.provider == "AzureFoundry" {
-                    "AzureFoundryLocal"
-                } else {
-                    model_info.provider
-                },
-                "PromptTemplate": model_info.prompt_template,
-            },
-            "IgnorePipeReport": true
-        });
-
-        // Add token if provided
-        if let Some(t) = token {
-            body["token"] = Value::String(t.to_string());
-        }
-
-        let client = self.client()?;
-        let response: Value = client
-            .post_with_progress("/openai/upgrade", Some(body))
+        // Get the latest model info (this also validates existence)
+        let model_info = self
+            .get_latest_model_info(alias_or_model_id, true)
             .await?;
 
-        // Check if the upgrade was successful
-        if !response["success"].as_bool().unwrap_or(false) {
-            let error_msg = response["error"]
-                .as_str()
-                .unwrap_or("Unknown error");
-
-            return Err(anyhow!(
-                "Failed to upgrade model with alias '{}' and ID '{}': {}",
-                model_info.alias,
-                model_info.id,
-                error_msg
-            ));
-        }
-
-        // Refresh the model cache to get the latest information
-        self.refresh_catalog();
-
-        // Get the updated model information
-        let updated_model_info = self.get_model_info(&model_info.id, true).await?;
-
-        Ok(updated_model_info)
+        // Download the model and discard the result
+        self.download_model(&model_info.id, token, false).await
     }
 
     /// Load a model.
diff --git a/sdk/rust/src/client.rs b/sdk/rust/src/client.rs
index fb964aaf..8779d4f4 100644
--- a/sdk/rust/src/client.rs
+++ b/sdk/rust/src/client.rs
@@ -46,7 +46,7 @@ impl HttpClient {
     /// A new HttpClient instance.
     pub fn new(host: &str, timeout_secs: Option<u64>) -> Self {
         let timeout = timeout_secs.map(Duration::from_secs);
-        let mut client_builder = Client::builder().user_agent("foundry-local-rust-sdk/0.1.0");
+        let mut client_builder = Client::builder().user_agent("foundry-local-rust-sdk/0.2.0");
 
         if let Some(timeout) = timeout {
             client_builder = client_builder.timeout(timeout);
diff --git a/sdk/rust/src/lib.rs b/sdk/rust/src/lib.rs
index 6cec3ea2..a563c980 100644
--- a/sdk/rust/src/lib.rs
+++ b/sdk/rust/src/lib.rs
@@ -12,7 +12,7 @@
 //!
 //! ## Example
 //!
-//! ```rust
+//! ```rust, ignore
 //! use foundry_local::FoundryLocalManager;
 //! use anyhow::Result;
 //!
@@ -37,8 +37,12 @@
 //!         .await?;
 //!     
 //!     let result = response.json::<serde_json::Value>().await?;
-//!     println!("{}", result["choices"][0]["message"]["content"]);
-//!     
+//!     if let Some(content) = result["choices"][0]["message"]["content"].as_str() {
+//!         println!("{}", content);
+//!     } else {
+//!         println!("No content found in response.");
+//!     }
+//!
 //!     Ok(())
 //! }
 //! ```
diff --git a/sdk/rust/src/models.rs b/sdk/rust/src/models.rs
index a6ed2194..b6aa6fe6 100644
--- a/sdk/rust/src/models.rs
+++ b/sdk/rust/src/models.rs
@@ -98,6 +98,10 @@ pub struct FoundryListResponseModel {
     pub license_description: String,
     #[serde(rename = "parentModelUri")]
     pub parent_model_uri: String,
+    #[serde(rename = "maxOutputTokens")]
+    pub max_output_tokens: i32,
+    #[serde(rename = "minFLVersion")]
+    pub min_fl_version: String,
 }
 
 /// Model information.
diff --git a/sdk/rust/tests/mock_service.rs b/sdk/rust/tests/mock_service.rs
index d36fdb7a..e7233b1d 100644
--- a/sdk/rust/tests/mock_service.rs
+++ b/sdk/rust/tests/mock_service.rs
@@ -26,11 +26,11 @@ impl Default for MockState {
         Self {
             catalog_models: vec![
                 FoundryModelInfo {
-                    id: "mock-model-1".to_string(),
-                    alias: "mock-small".to_string(),
+                    id: "Phi-4-mini-instruct-generic-cpu:1".to_string(),
+                    alias: "phi-4-mini".to_string(),
                     runtime: ExecutionProvider::CPU,
                     file_size_mb: 100,
-                    uri: "https://mock-uri/model1".to_string(),
+                    uri: "azureml://registries/azureml/models/Phi-4-mini-instruct-generic-cpu/versions/1".to_string(),
                     version: "1.0".to_string(),
                     prompt_template: serde_json::json!({}),
                     provider: "MockProvider".to_string(),
@@ -39,11 +39,11 @@ impl Default for MockState {
                     task: "text-generation".to_string(),
                 },
                 FoundryModelInfo {
-                    id: "mock-model-2".to_string(),
-                    alias: "mock-medium".to_string(),
+                    id: "qwen2.5-0.5b-instruct-cuda-gpu:1".to_string(),
+                    alias: "qwen2.5-0.5b".to_string(),
                     runtime: ExecutionProvider::CUDA,
                     file_size_mb: 500,
-                    uri: "https://mock-uri/model2".to_string(),
+                    uri: "azureml://registries/azureml/models/qwen2.5-0.5b-instruct-cuda-gpu/versions/1".to_string(),
                     version: "1.0".to_string(),
                     prompt_template: serde_json::json!({}),
                     provider: "MockProvider".to_string(),
@@ -51,8 +51,21 @@ impl Default for MockState {
                     license: "MIT".to_string(),
                     task: "text-generation".to_string(),
                 },
+                FoundryModelInfo {
+                    id: "qwen2.5-0.5b-instruct-cuda-gpu:2".to_string(),
+                    alias: "qwen2.5-0.5b".to_string(),
+                    runtime: ExecutionProvider::CUDA,
+                    file_size_mb: 600,
+                    uri: "azureml://registries/azureml/models/qwen2.5-0.5b-instruct-cuda-gpu/versions/2".to_string(),
+                    version: "2.0".to_string(),
+                    prompt_template: serde_json::json!({}),
+                    provider: "MockProvider".to_string(),
+                    publisher: "MockPublisher".to_string(),
+                    license: "MIT".to_string(),
+                    task: "text-generation".to_string(),
+                },
             ],
-            cached_models: vec!["mock-model-1".to_string()],
+            cached_models: vec!["qwen2.5-0.5b-instruct-cuda-gpu:1".to_string()],
             loaded_models: vec![],
             cache_location: "/tmp/mock-cache".to_string(),
         }
@@ -86,7 +99,9 @@ async fn list_catalog(State(state): State<AppState>) -> impl IntoResponse {
                 "supportsToolCalling": false,
                 "license": model.license,
                 "licenseDescription": "",
-                "parentModelUri": ""
+                "parentModelUri": "",
+                "maxOutputTokens": 1024,
+                "minFLVersion": "1.0.0"
             })
         })
         .collect::<Vec<_>>();
diff --git a/sdk/rust/tests/test_api.rs b/sdk/rust/tests/test_api.rs
index db54d81c..bfd7137a 100644
--- a/sdk/rust/tests/test_api.rs
+++ b/sdk/rust/tests/test_api.rs
@@ -29,13 +29,16 @@ async fn test_list_catalog_models() {
     let catalog_models = manager.list_catalog_models().await.unwrap();
 
     // Verify the result
-    assert_eq!(catalog_models.len(), 2);
-    assert_eq!(catalog_models[0].id, "mock-model-1");
-    assert_eq!(catalog_models[0].alias, "mock-small");
+    assert_eq!(catalog_models.len(), 3);
+    assert_eq!(catalog_models[0].id, "Phi-4-mini-instruct-generic-cpu:1");
+    assert_eq!(catalog_models[0].alias, "phi-4-mini");
     assert_eq!(catalog_models[0].runtime, ExecutionProvider::CPU);
-    assert_eq!(catalog_models[1].id, "mock-model-2");
-    assert_eq!(catalog_models[1].alias, "mock-medium");
+    assert_eq!(catalog_models[1].id, "qwen2.5-0.5b-instruct-cuda-gpu:1");
+    assert_eq!(catalog_models[1].alias, "qwen2.5-0.5b");
     assert_eq!(catalog_models[1].runtime, ExecutionProvider::CUDA);
+    assert_eq!(catalog_models[2].id, "qwen2.5-0.5b-instruct-cuda-gpu:2");
+    assert_eq!(catalog_models[2].alias, "qwen2.5-0.5b");
+    assert_eq!(catalog_models[2].runtime, ExecutionProvider::CUDA);
 
     // Shutdown the mock server
     shutdown_tx.send(()).unwrap();
@@ -50,14 +53,14 @@ async fn test_get_model_info() {
     let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await;
 
     // Test getting model info by ID
-    let model_info = manager.get_model_info("mock-model-1", false).await.unwrap();
-    assert_eq!(model_info.id, "mock-model-1");
-    assert_eq!(model_info.alias, "mock-small");
+    let model_info = manager.get_model_info("Phi-4-mini-instruct-generic-cpu:1", false).await.unwrap();
+    assert_eq!(model_info.id, "Phi-4-mini-instruct-generic-cpu:1");
+    assert_eq!(model_info.alias, "phi-4-mini");
 
     // Test getting model info by alias
-    let model_info = manager.get_model_info("mock-small", false).await.unwrap();
-    assert_eq!(model_info.id, "mock-model-1");
-    assert_eq!(model_info.alias, "mock-small");
+    let model_info = manager.get_model_info("qwen2.5-0.5b", false).await.unwrap();
+    assert_eq!(model_info.id, "qwen2.5-0.5b-instruct-cuda-gpu:2");
+    assert_eq!(model_info.alias, "qwen2.5-0.5b");
 
     // Shutdown the mock server
     shutdown_tx.send(()).unwrap();
@@ -90,7 +93,7 @@ async fn test_list_cached_models() {
     // Test listing cached models
     let cached_models = manager.list_cached_models().await.unwrap();
     assert_eq!(cached_models.len(), 1);
-    assert_eq!(cached_models[0].id, "mock-model-1");
+    assert_eq!(cached_models[0].id, "qwen2.5-0.5b-instruct-cuda-gpu:1");
 
     // Shutdown the mock server
     shutdown_tx.send(()).unwrap();
@@ -106,21 +109,72 @@ async fn test_download_model() {
 
     // Test downloading a model
     let model_info = manager
-        .download_model("mock-model-2", None, false)
+        .download_model("qwen2.5-0.5b", None, false)
         .await
         .unwrap();
-    assert_eq!(model_info.id, "mock-model-2");
+    assert_eq!(model_info.id, "qwen2.5-0.5b-instruct-cuda-gpu:2");
 
-    // Verify the model is now cached
+    // Verify latest version of the model is now also cached
     let cached_models = manager.list_cached_models().await.unwrap();
     assert_eq!(cached_models.len(), 2);
-    assert!(cached_models.iter().any(|m| m.id == "mock-model-1"));
-    assert!(cached_models.iter().any(|m| m.id == "mock-model-2"));
+    assert!(cached_models.iter().any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:1"));
+    assert!(cached_models.iter().any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:2"));
 
     // Shutdown the mock server
     shutdown_tx.send(()).unwrap();
 }
 
+#[tokio::test]
+async fn test_is_model_upgradeable() {
+    // Start the mock server
+    let (server_uri, shutdown_tx) = start_mock_server().await;
+
+    // Create a manager with the mock server URI
+    let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await;
+
+    // When no version is in the cache
+    let is_upgradeable = manager.is_model_upgradeable("phi-4-mini").await.unwrap();
+    assert!(is_upgradeable, "Expected upgradeable because latest version is not cached");
+
+    // When the latest version is not in the cache
+    let is_upgradeable = manager.is_model_upgradeable("qwen2.5-0.5b").await.unwrap();
+    assert!(is_upgradeable, "Expected upgradeable because latest version is not cached");
+
+    // Shutdown the mock server
+    shutdown_tx.send(()).unwrap();
+}
+
+#[tokio::test]
+async fn test_upgrade_model_success() {
+    let (server_uri, shutdown_tx) = start_mock_server().await;
+    let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await;
+
+    // This should trigger download of latest model "qwen2.5-0.5b-instruct-cuda-gpu:2"
+    let model_info = manager.upgrade_model("qwen2.5-0.5b", None).await.unwrap();
+
+    // Assert returned model info is correct (example)
+    assert_eq!(model_info.id, "qwen2.5-0.5b-instruct-cuda-gpu:2");
+
+    shutdown_tx.send(()).unwrap();
+}
+
+#[tokio::test]
+async fn test_upgrade_model_not_found() {
+    let (server_uri, shutdown_tx) = start_mock_server().await;
+    let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await;
+
+    // Call upgrade_model without unwrap to get Result
+    let result = manager.upgrade_model("non-existent-model", None).await;
+
+    // Assert it returned an error
+    assert!(result.is_err());
+
+    let err_msg = format!("{}", result.unwrap_err());
+    assert!(err_msg.contains("not found"));
+
+    shutdown_tx.send(()).unwrap();
+}
+
 #[tokio::test]
 async fn test_load_and_unload_model() {
     // Start the mock server
@@ -130,16 +184,16 @@ async fn test_load_and_unload_model() {
     let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await;
 
     // Test loading a model
-    let model_info = manager.load_model("mock-model-1", Some(300)).await.unwrap();
-    assert_eq!(model_info.id, "mock-model-1");
+    let model_info = manager.load_model("phi-4-mini", Some(300)).await.unwrap();
+    assert_eq!(model_info.id, "Phi-4-mini-instruct-generic-cpu:1");
 
     // Verify the model is loaded
     let loaded_models = manager.list_loaded_models().await.unwrap();
     assert_eq!(loaded_models.len(), 1);
-    assert_eq!(loaded_models[0].id, "mock-model-1");
+    assert_eq!(loaded_models[0].id, "Phi-4-mini-instruct-generic-cpu:1");
 
     // Test unloading the model
-    manager.unload_model("mock-model-1", false).await.unwrap();
+    manager.unload_model("phi-4-mini", false).await.unwrap();
 
     // Verify the model is unloaded
     let loaded_models = manager.list_loaded_models().await.unwrap();

From 603d8f002ee121b3b471802b6e85dda183159eec Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Sun, 27 Jul 2025 23:16:31 -0700
Subject: [PATCH 12/15] cargo format

---
 sdk/rust/src/api.rs        | 33 ++++++++++++++++++---------------
 sdk/rust/tests/test_api.rs | 23 ++++++++++++++++++-----
 2 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/sdk/rust/src/api.rs b/sdk/rust/src/api.rs
index 446c4186..a2762384 100644
--- a/sdk/rust/src/api.rs
+++ b/sdk/rust/src/api.rs
@@ -316,9 +316,15 @@ impl FoundryLocalManager {
         }
 
         if raise_on_not_found {
-            Err(anyhow!("Model {} not found in the catalog", alias_or_model_id))
+            Err(anyhow!(
+                "Model {} not found in the catalog",
+                alias_or_model_id
+            ))
         } else {
-            Err(anyhow!("Model {} not found in the catalog", alias_or_model_id))
+            Err(anyhow!(
+                "Model {} not found in the catalog",
+                alias_or_model_id
+            ))
         }
     }
 
@@ -343,7 +349,9 @@ impl FoundryLocalManager {
             if raise_on_not_found {
                 return Err(anyhow!("The provided model alias or ID was empty."));
             } else {
-                return Err(anyhow!("Model alias or ID was empty and raise_on_not_found is false."));
+                return Err(anyhow!(
+                    "Model alias or ID was empty and raise_on_not_found is false."
+                ));
             }
         }
 
@@ -356,12 +364,14 @@ impl FoundryLocalManager {
                 Ok(model.clone())
             } else {
                 // Fallback: try get_model_info which can look for latest version of ID without version
-                self.get_model_info(alias_or_model_id, raise_on_not_found).await
+                self.get_model_info(alias_or_model_id, raise_on_not_found)
+                    .await
             }
         } else {
             // ID with version suffix: strip version and get latest by ID without version
             let id_without_version = alias_or_model_id.split(':').next().unwrap_or("");
-            self.get_model_info(id_without_version, raise_on_not_found).await
+            self.get_model_info(id_without_version, raise_on_not_found)
+                .await
         }
     }
 
@@ -484,16 +494,11 @@ impl FoundryLocalManager {
     /// # Returns
     ///
     /// bool: True if a newer version is available, False otherwise.
-    pub async fn is_model_upgradeable(
-        &mut self,
-        alias_or_model_id: &str,
-    ) -> Result<bool> {
+    pub async fn is_model_upgradeable(&mut self, alias_or_model_id: &str) -> Result<bool> {
         info!("Checking if model '{}' is upgradeable", alias_or_model_id);
 
         // Get the latest model info (throws if not found)
-        let latest_model_info = self
-            .get_latest_model_info(alias_or_model_id, true)
-            .await?;
+        let latest_model_info = self.get_latest_model_info(alias_or_model_id, true).await?;
 
         let latest_version = get_version(&latest_model_info.id);
         if latest_version == -1 {
@@ -528,9 +533,7 @@ impl FoundryLocalManager {
         token: Option<&str>,
     ) -> Result<FoundryModelInfo> {
         // Get the latest model info (this also validates existence)
-        let model_info = self
-            .get_latest_model_info(alias_or_model_id, true)
-            .await?;
+        let model_info = self.get_latest_model_info(alias_or_model_id, true).await?;
 
         // Download the model and discard the result
         self.download_model(&model_info.id, token, false).await
diff --git a/sdk/rust/tests/test_api.rs b/sdk/rust/tests/test_api.rs
index bfd7137a..843d14df 100644
--- a/sdk/rust/tests/test_api.rs
+++ b/sdk/rust/tests/test_api.rs
@@ -53,7 +53,10 @@ async fn test_get_model_info() {
     let mut manager = FoundryLocalManager::with_test_uri(&server_uri).await;
 
     // Test getting model info by ID
-    let model_info = manager.get_model_info("Phi-4-mini-instruct-generic-cpu:1", false).await.unwrap();
+    let model_info = manager
+        .get_model_info("Phi-4-mini-instruct-generic-cpu:1", false)
+        .await
+        .unwrap();
     assert_eq!(model_info.id, "Phi-4-mini-instruct-generic-cpu:1");
     assert_eq!(model_info.alias, "phi-4-mini");
 
@@ -117,8 +120,12 @@ async fn test_download_model() {
     // Verify latest version of the model is now also cached
     let cached_models = manager.list_cached_models().await.unwrap();
     assert_eq!(cached_models.len(), 2);
-    assert!(cached_models.iter().any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:1"));
-    assert!(cached_models.iter().any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:2"));
+    assert!(cached_models
+        .iter()
+        .any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:1"));
+    assert!(cached_models
+        .iter()
+        .any(|m| m.id == "qwen2.5-0.5b-instruct-cuda-gpu:2"));
 
     // Shutdown the mock server
     shutdown_tx.send(()).unwrap();
@@ -134,11 +141,17 @@ async fn test_is_model_upgradeable() {
 
     // When no version is in the cache
     let is_upgradeable = manager.is_model_upgradeable("phi-4-mini").await.unwrap();
-    assert!(is_upgradeable, "Expected upgradeable because latest version is not cached");
+    assert!(
+        is_upgradeable,
+        "Expected upgradeable because latest version is not cached"
+    );
 
     // When the latest version is not in the cache
     let is_upgradeable = manager.is_model_upgradeable("qwen2.5-0.5b").await.unwrap();
-    assert!(is_upgradeable, "Expected upgradeable because latest version is not cached");
+    assert!(
+        is_upgradeable,
+        "Expected upgradeable because latest version is not cached"
+    );
 
     // Shutdown the mock server
     shutdown_tx.send(()).unwrap();

From 9b20fb40e07de9a5e5d0444f2d8d688c0ad0d8d6 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Mon, 28 Jul 2025 16:56:46 -0700
Subject: [PATCH 13/15] update hello-foundry-local sample, make new Response
 properties optional

---
 samples/python/hello-foundry-local/src/app.py | 2 +-
 sdk/python/foundry_local/models.py            | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/samples/python/hello-foundry-local/src/app.py b/samples/python/hello-foundry-local/src/app.py
index dd43a2a7..fb78b724 100644
--- a/samples/python/hello-foundry-local/src/app.py
+++ b/samples/python/hello-foundry-local/src/app.py
@@ -6,7 +6,7 @@
 
 # By using an alias, the most suitable model will be downloaded
 # to your end-user's device.
-alias = "phi-3.5-mini"
+alias = "qwen2.5-coder-0.5b-instruct-generic-cpu:3"
 
 # Create a FoundryLocalManager instance. This will start the Foundry
 # Local service if it is not already running and load the specified model.
diff --git a/sdk/python/foundry_local/models.py b/sdk/python/foundry_local/models.py
index c4018c07..eb3847c6 100644
--- a/sdk/python/foundry_local/models.py
+++ b/sdk/python/foundry_local/models.py
@@ -7,6 +7,7 @@
 import sys
 
 from pydantic import BaseModel, Field
+from typing import Optional
 
 if sys.version_info >= (3, 11):
     from enum import StrEnum
@@ -78,8 +79,8 @@ class FoundryListResponseModel(BaseModel):
     license: str = Field(..., description="License of the model")
     licenseDescription: str = Field(..., description="License description of the model")
     parentModelUri: str = Field(..., description="Parent model URI of the model")
-    maxOutputTokens: int = Field(..., description="Maximum output tokens for the model")
-    minFLVersion: str = Field(..., description="Minimum Foundry Local version required for the model")
+    maxOutputTokens: Optional[int] = Field(..., description="Maximum output tokens for the model")
+    minFLVersion: Optional[str] = Field(..., description="Minimum Foundry Local version required for the model")
 
 
 class FoundryModelInfo(BaseModel):

From 35cadcb0ec11757e78e58f019d9c647d62e4d2db Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Mon, 28 Jul 2025 17:08:15 -0700
Subject: [PATCH 14/15] Update model used in js/hello-foundry-local

---
 samples/js/hello-foundry-local/src/app.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/js/hello-foundry-local/src/app.js b/samples/js/hello-foundry-local/src/app.js
index 6dc41a31..2fdae5d5 100644
--- a/samples/js/hello-foundry-local/src/app.js
+++ b/samples/js/hello-foundry-local/src/app.js
@@ -8,7 +8,7 @@ import { FoundryLocalManager } from "foundry-local-sdk";
 // to your end-user's device.
 // TIP: You can find a list of available models by running the 
 // following command in your terminal: `foundry model list`.
-const alias = "phi-3.5-mini";
+const alias = "qwen2.5-coder-0.5b-instruct-generic-cpu:3";
 
 // Create a FoundryLocalManager instance. This will start the Foundry 
 // Local service if it is not already running.

From 155ba3a49a1498dd78bc2a362423135c67634a77 Mon Sep 17 00:00:00 2001
From: Alex Marin <emarin@microsoft.com>
Date: Tue, 29 Jul 2025 11:11:51 -0700
Subject: [PATCH 15/15] Update _get_latest_model_info implementation

---
 sdk/python/foundry_local/api.py | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/sdk/python/foundry_local/api.py b/sdk/python/foundry_local/api.py
index 56a736a4..3c998161 100644
--- a/sdk/python/foundry_local/api.py
+++ b/sdk/python/foundry_local/api.py
@@ -256,20 +256,9 @@ def _get_latest_model_info(self, alias_or_model_id: str, raise_on_not_found: boo
                 raise ValueError("The provided nodel alias or ID was empty.")
             return None
 
-        catalog = self._get_catalog_dict()
-
-        # if alias or id without version
-        if ":" not in alias_or_model_id:
-            # if alias
-            if catalog[alias_or_model_id] is not None:
-                return catalog[alias_or_model_id]
-            else:
-                # if id without version, then get_model_info will get the latest version
-                return self.get_model_info(alias_or_model_id, raise_on_not_found)
-        else:
-            # if id with version, remove the ":<version>" suffix and use the name to get the latest model
-            id_without_version = alias_or_model_id.split(":")[0]
-            return self.get_model_info(id_without_version, raise_on_not_found)
+        # remove the ":<version>" suffix if it exists, and use it to get the latest model
+        alias_or_name_without_version = alias_or_model_id.split(":")[0]
+        return self.get_model_info(alias_or_name_without_version, raise_on_not_found)
 
     # Cache management api
     def get_cache_location(self):