From edd5e4eb4c544a6b9ee5e981c30c7794b39c84a6 Mon Sep 17 00:00:00 2001 From: sbaidachni Date: Wed, 19 Feb 2025 08:55:52 -0800 Subject: [PATCH 01/54] fix issues with git in container --- .buildcontainer/Dockerfile | 2 +- .github/workflows/build_devops_container.yml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 0f9acf9..0955ab6 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -4,7 +4,7 @@ ARG USERNAME=vscode USER root -RUN apt-get update -y && apt-get install -y sudo wget gnupg software-properties-common curl bash && \ +RUN apt-get update -y && apt-get install -y sudo wget gnupg software-properties-common curl bash git && \ # Download and install Miniconda wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ bash /tmp/miniconda.sh -b -p /opt/miniconda && \ diff --git a/.github/workflows/build_devops_container.yml b/.github/workflows/build_devops_container.yml index a52a287..8942519 100644 --- a/.github/workflows/build_devops_container.yml +++ b/.github/workflows/build_devops_container.yml @@ -6,7 +6,8 @@ on: - 'development' paths: - '.github/workflows/build_devops_container.yml' - - 'requirements.txt' + - 'requirements.txt' + - '.buildcontainer/Dockerfile' env: IMAGE_NAME: devops_container_image From 6f00218596925ee41d052faaaf749780ddffa866 Mon Sep 17 00:00:00 2001 From: Marisa <38594207+msa2984@users.noreply.github.com> Date: Mon, 24 Feb 2025 10:55:14 -0500 Subject: [PATCH 02/54] Adding PR Comment with results of evaluation. (#89) --- .github/workflows/ai_pull_pr_workflow.yml | 41 ++++++++++++++++++++++- mlops/evaluation/search_evaluation.py | 8 ++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index 08fd749..a0fa2d9 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -20,6 +20,7 @@ env: permissions: id-token: write contents: read + pull-requests: write jobs: build-validation: name: Build Validation @@ -76,4 +77,42 @@ jobs: python -u -m mlops.evaluation.search_evaluation --gt_path "./mlops/evaluation/data/search_evaluation_data.jsonl" --semantic_config my-semantic-config env: BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} - + + - name: Post PR comment with results of evaluation + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if ! command -v jq &> /dev/null; then + sudo apt-get update && sudo apt-get install -y jq + fi + + latest_results_file=$(ls -t results/ | head -n 1) + results=$(cat "results/${latest_results_file}") + + metrics=$(echo "$results" | jq -r '.metrics') + metric_keys=$(echo "$metrics" | jq -r 'keys[]') + metric_values=$(echo "$metrics" | jq -r 'values[]') + + metrics_table="## Search Evaluation Results \n \n" + metrics_table="${metrics_table}| Metric | Value |\n" + metrics_table="${metrics_table}| ------ | ----- |\n" + for key in $metric_keys; do + value=$(echo "$metrics" | jq -r --arg key "$key" '.[$key]') + metrics_table="${metrics_table}| $key | $value |\n" + done + + formatted_table=$(echo "$metrics_table") + + # Use the captured output in the curl command + curl -s -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + --data "$(jq -nc --arg body "$formatted_table" '{body: $body}')" \ + "https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.number }}/comments" + + + - name: Remove results directory + run: | + if [ -d "results" ]; then + rm -rf results + fi + diff --git a/mlops/evaluation/search_evaluation.py b/mlops/evaluation/search_evaluation.py index a70fff1..a15b0de 100644 --- a/mlops/evaluation/search_evaluation.py +++ b/mlops/evaluation/search_evaluation.py @@ -67,6 +67,11 @@ def main(index_name: str, semantic_config: str, data_path: str): } } + # Create results directory if it does not exist + results_dir = "./results" + if not os.path.exists(results_dir): + os.makedirs(results_dir) + # Run evaluations results = evaluate( evaluation_name=experiment_name, @@ -78,7 +83,8 @@ def main(index_name: str, semantic_config: str, data_path: str): "subscription_id": subscription_id, "resource_group_name": resource_group, "project_name": project_name, - } + }, + output_path=f"{results_dir}/{experiment_name}.json", ) print(results["studio_url"]) From 512623cd2f7211d64649cfac463c51e2997fc714 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 28 Feb 2025 22:48:08 -0800 Subject: [PATCH 03/54] jq is a part of container --- .buildcontainer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 0955ab6..01c2d31 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -4,7 +4,7 @@ ARG USERNAME=vscode USER root -RUN apt-get update -y && apt-get install -y sudo wget gnupg software-properties-common curl bash git && \ +RUN apt-get update -y && apt-get install -y sudo wget gnupg software-properties-common curl bash git jq && \ # Download and install Miniconda wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ bash /tmp/miniconda.sh -b -p /opt/miniconda && \ From f706fa4221842e35f467c1d120c2c9d549ac02a5 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 23 May 2025 11:43:19 -0700 Subject: [PATCH 04/54] adding initial telemetry --- .env.sample | 1 + mlops/deployment_scripts/deploy_azure_functions.py | 1 + 2 files changed, 2 insertions(+) diff --git a/.env.sample b/.env.sample index 7d5dce8..d270f24 100644 --- a/.env.sample +++ b/.env.sample @@ -7,3 +7,4 @@ AOAI_BASE_ENDPOINT= AI_STUDIO_PROJECT_NAME= MANAGED_IDENTITY_CLIENT_ID= MANAGED_IDENTITY_NAME= +ENABLE_TELEMETRY=true diff --git a/mlops/deployment_scripts/deploy_azure_functions.py b/mlops/deployment_scripts/deploy_azure_functions.py index 189dd62..ba7048a 100644 --- a/mlops/deployment_scripts/deploy_azure_functions.py +++ b/mlops/deployment_scripts/deploy_azure_functions.py @@ -75,6 +75,7 @@ def _wait_for_functions_ready( params = {"api-version": FUNCTION_API_VERSION} headers = { "Content-Type": APPLICATION_JSON_CONTENT_TYPE, + "User-Agent": "acce1e78-98c3-42d1-b5fd-a5c2c365fbfe/1.0", "Accept": APPLICATION_JSON_CONTENT_TYPE, "Authorization": "Bearer {access_token}".format(access_token=access_token), } From cca4b708bb862cb16d2bb5c3fad520a64d3558f0 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Tue, 27 Jan 2026 12:36:26 -0800 Subject: [PATCH 05/54] fix upload_data on Windows --- .env.sample | 1 + config/config.yaml | 2 +- mlops/deployment_scripts/upload_data.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.env.sample b/.env.sample index d270f24..739b5d1 100644 --- a/.env.sample +++ b/.env.sample @@ -8,3 +8,4 @@ AI_STUDIO_PROJECT_NAME= MANAGED_IDENTITY_CLIENT_ID= MANAGED_IDENTITY_NAME= ENABLE_TELEMETRY=true +FUNCTION_APP_NAME= diff --git a/config/config.yaml b/config/config.yaml index 8d4c5a7..5563358 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -15,7 +15,7 @@ aoai_config: functions_config: function_names: ["Chunk", "Vector_Embed"] - function_app_name: aiskills-pull + function_app_name: ${FUNCTION_APP_NAME} # Azure Cognitive Service config acs_config: diff --git a/mlops/deployment_scripts/upload_data.py b/mlops/deployment_scripts/upload_data.py index c370d0b..7e4e3a0 100644 --- a/mlops/deployment_scripts/upload_data.py +++ b/mlops/deployment_scripts/upload_data.py @@ -6,6 +6,7 @@ from pathlib import Path import argparse +import os from azure.identity import DefaultAzureCredential from azure.storage.blob import BlobServiceClient from mlops.common.config_utils import MLOpsConfig @@ -37,10 +38,10 @@ def _upload_ops_files( # construct blob name from file path # everything rather than local_folder - file_subpath = str(file).split(f"{local_folder}/")[1] + file_subpath = file.relative_to(local_folder) # generate a unique name of the file - file_name = file_subpath.replace("/", "_") + file_name = str(file_subpath).replace(os.sep, "_") try: print(f"Ready to copy: {str(file)} to {file_name}.") From 0562d3de916c6ea9d809104b509f5cc58db0de1f Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Tue, 27 Jan 2026 12:40:31 -0800 Subject: [PATCH 06/54] bump library version --- src/custom_skills/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/custom_skills/requirements.txt b/src/custom_skills/requirements.txt index b24a0a0..9cdb71a 100644 --- a/src/custom_skills/requirements.txt +++ b/src/custom_skills/requirements.txt @@ -2,7 +2,7 @@ # The Python Worker is managed by Azure Functions platform # Manually managing azure-functions-worker may cause unexpected issues -azure-core==1.29.5 +azure-core==1.38.0 azure-functions==1.17.0 azure-identity==1.16.1 azure-storage-blob==12.19.0 From 20a1d1bf30d3ea3b3a6d290c0fc8dd9178a28b38 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Tue, 27 Jan 2026 14:09:00 -0800 Subject: [PATCH 07/54] switching to cli for deployment --- mlops/common/function_utils.py | 2 - .../deploy_azure_functions.py | 140 +++++++----------- src/custom_skills/requirements.txt | 20 +-- 3 files changed, 61 insertions(+), 101 deletions(-) diff --git a/mlops/common/function_utils.py b/mlops/common/function_utils.py index e5cf7a9..7ff2814 100644 --- a/mlops/common/function_utils.py +++ b/mlops/common/function_utils.py @@ -17,8 +17,6 @@ def get_app_settings(config: dict, index_name: str): settings_dict["MANAGED_IDENTITY_CLIENT_ID"] = config.sub_config["managed_identity_client_id"] - settings_dict["ENABLE_ORYX_BUILD"] = "true" - settings_dict["SCM_DO_BUILD_DURING_DEPLOYMENT"] = "true" return settings_dict diff --git a/mlops/deployment_scripts/deploy_azure_functions.py b/mlops/deployment_scripts/deploy_azure_functions.py index ba7048a..12da5aa 100644 --- a/mlops/deployment_scripts/deploy_azure_functions.py +++ b/mlops/deployment_scripts/deploy_azure_functions.py @@ -4,10 +4,11 @@ import shutil import time import argparse +import subprocess from azure.identity import DefaultAzureCredential from azure.mgmt.web import WebSiteManagementClient -from azure.mgmt.web.v2023_01_01.models import Site +from azure.mgmt.web.models import Site from mlops.common.config_utils import MLOpsConfig from mlops.common.naming_utils import generate_slot_name, generate_index_name from mlops.common.function_utils import ( @@ -17,10 +18,6 @@ # Define the path to the Azure function directory APPLICATION_JSON_CONTENT_TYPE = "application/json" FUNCTION_API_VERSION = "2022-03-01" -DEPLOYMENT_APP_URL = "https://{function_app_name}.scm.azurewebsites.net/api/zipdeploy" -DEPLOYMENT_APP_URL_WITH_SLOT = ( - "https://{function_app_name}-{slot}.scm.azurewebsites.net/api/zipdeploy" -) MANAGEMENT_FUNCTION_URL = ( "https://management.azure.com/subscriptions/{subscription_id}" "/resourceGroups/{resource_group}" @@ -127,7 +124,6 @@ def _wait_for_functions_ready( def _deploy_functions( credential: DefaultAzureCredential, - deployment_url: str, subscription_id: str, resource_group_name: str, func_name: str, @@ -137,12 +133,6 @@ def _deploy_functions( credential=credential, subscription_id=subscription_id ) - # Generate access token header - access_token = credential.get_token(MANAGEMENT_SCOPE_URL).token - headers = { - "Content-Type": "application/zip", - "Authorization": "Bearer {access_token}".format(access_token=access_token), - } # Create a zip file of the Custom Skills directory zip_filename = shutil.make_archive( base_name="__customskills", @@ -150,42 +140,35 @@ def _deploy_functions( root_dir=CUSTOM_SKILLS_DIR, ) - # Define the payload for the REST API call - with open(zip_filename, "rb") as f: - payload = f.read() - try: - # Send a POST request to the Azure function app to deploy the zip file - requests.post(deployment_url, headers=headers, data=payload, timeout=60) + print(f"Deploying {zip_filename} to {func_name}...") + subprocess.run( + [ + "az", + "functionapp", + "deployment", + "source", + "config-zip", + "-g", + resource_group_name, + "-n", + func_name, + "--src", + zip_filename, + "--build-remote", + "true", + ], + check=True, + shell=True, + ) + except subprocess.CalledProcessError as e: + print(f"Error deploying function app: {e}") + raise except requests.exceptions.RequestException: print( "Request has been sent, but no response yet. Checking deployment status in the next step." ) - print("Looking for an active deployment.") - # look at existing app for a location - deployment_slots = app_mgmt_client.web_apps.list_deployments( - resource_group_name, func_name - ) - - current_slot = deployment_slots.next() - id = current_slot.id.split("/")[-1] - - print(f"Deployment id: {id}") - status = current_slot.status - - # get_deployment_slot returns 4 in the case of success and 1 for in-progress deployment. - while status != 4: - current_slot = app_mgmt_client.web_apps.get_deployment( - resource_group_name, func_name, id - ) - status = current_slot.status - if status == 1: - print("Deployment is in progress") - elif status != 4: - raise SystemExit(f"Unknown deployment status {status}") - time.sleep(10) - print("Updating Application settings.") existing_app_settings = app_mgmt_client.web_apps.list_application_settings( @@ -205,7 +188,6 @@ def _deploy_functions( def _deploy_functions_withslot( credential: DefaultAzureCredential, - deployment_url: str, subscription_id: str, resource_group_name: str, func_name: str, @@ -216,12 +198,6 @@ def _deploy_functions_withslot( credential=credential, subscription_id=subscription_id ) - # Generate access token header - access_token = credential.get_token(MANAGEMENT_SCOPE_URL).token - headers = { - "Content-Type": "application/zip", - "Authorization": "Bearer {access_token}".format(access_token=access_token), - } print(f"slot name is {slot_name}") # Create a zip file of the Custom Skills directory zip_filename = shutil.make_archive( @@ -230,41 +206,36 @@ def _deploy_functions_withslot( root_dir=CUSTOM_SKILLS_DIR, ) - # Define the payload for the REST API call - with open(zip_filename, "rb") as f: - payload = f.read() - try: - # Send a POST request to the Azure function app to deploy the zip file - requests.post(deployment_url, headers=headers, data=payload, timeout=60) + print(f"Deploying {zip_filename} to {func_name} with slot {slot_name}...") + subprocess.run( + [ + "az", + "functionapp", + "deployment", + "source", + "config-zip", + "-g", + resource_group_name, + "-n", + func_name, + "--src", + zip_filename, + "--build-remote", + "true", + "--slot", + slot_name, + ], + check=True, + shell=True, + ) + except subprocess.CalledProcessError as e: + print(f"Error deploying function app: {e}") + raise except requests.exceptions.RequestException: print( "Request has been sent, but no response yet. Checking deployment status in the next step." ) - # raise SystemExit(e) - - print("Looking for an active deployment.") - # look at existing app for a location - deployment_slots = app_mgmt_client.web_apps.list_deployments_slot( - resource_group_name, func_name, slot_name - ) - current_slot = deployment_slots.next() - id = current_slot.id.split("/")[-1] - - print(f"Deployment id: {id}") - status = current_slot.status - - # get_deployment_slot returns 4 in the case of success and 1 for in-progress deployment. - while status != 4: - current_slot = app_mgmt_client.web_apps.get_deployment_slot( - resource_group_name, func_name, id, slot_name - ) - status = current_slot.status - if status == 1: - print("Deployment is in progress") - elif status != 4: - raise SystemExit(f"Unknown deployment status {status}") - time.sleep(10) print("Updating Application settings.") existing_app_settings = app_mgmt_client.web_apps.list_application_settings_slot( @@ -314,23 +285,15 @@ def main(): app_settings = get_app_settings(config, generate_index_name()) # deploying or updating the slot - if slot_name is None: - deployment_url = DEPLOYMENT_APP_URL.format(function_app_name=function_app_name) - else: + if slot_name is not None: print("Creating a deployment slot.") _create_or_update_deployment_slot( credential, subscription_id, resource_group, function_app_name, slot_name ) - deployment_url = DEPLOYMENT_APP_URL_WITH_SLOT.format( - function_app_name=function_app_name, slot=slot_name - ) - - print(f"Deploying to: {deployment_url}") if slot_name is None: _deploy_functions( credential, - deployment_url, subscription_id, resource_group, function_app_name, @@ -339,7 +302,6 @@ def main(): else: _deploy_functions_withslot( credential, - deployment_url, subscription_id, resource_group, function_app_name, diff --git a/src/custom_skills/requirements.txt b/src/custom_skills/requirements.txt index 9cdb71a..e565ee5 100644 --- a/src/custom_skills/requirements.txt +++ b/src/custom_skills/requirements.txt @@ -2,17 +2,17 @@ # The Python Worker is managed by Azure Functions platform # Manually managing azure-functions-worker may cause unexpected issues -azure-core==1.38.0 -azure-functions==1.17.0 -azure-identity==1.16.1 -azure-storage-blob==12.19.0 -jsonschema==4.19.2 +azure-core +azure-functions +azure-identity +azure-storage-blob +jsonschema openai -python-dotenv==1.0.0 -tenacity==8.2.3 -tiktoken==0.5.1 -numexpr==2.8.7 -azure-search-documents==11.6.0b5 +python-dotenv +tenacity +tiktoken +numexpr +azure-search-documents langchain-text-splitters langchain_community pypdf \ No newline at end of file From 247968894a0fecc8553783c777b6c60eadac6e26 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Tue, 27 Jan 2026 14:37:50 -0800 Subject: [PATCH 08/54] switch to function auth (can be enabled with AAD at the same time) --- src/custom_skills/function_app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/custom_skills/function_app.py b/src/custom_skills/function_app.py index 2e5f579..6dc44f8 100644 --- a/src/custom_skills/function_app.py +++ b/src/custom_skills/function_app.py @@ -7,7 +7,7 @@ app = func.FunctionApp() -@app.route("Health", auth_level=func.AuthLevel.ANONYMOUS) +@app.route("Health", auth_level=func.AuthLevel.FUNCTION) def health_check(req: func.HttpRequest) -> func.HttpResponse: """Check health of the function.""" version = 1 @@ -15,13 +15,13 @@ def health_check(req: func.HttpRequest) -> func.HttpResponse: return func.HttpResponse(f"This function executed successfully with version {version}.", status_code=200) -@app.route("Chunk", auth_level=func.AuthLevel.ANONYMOUS) +@app.route("Chunk", auth_level=func.AuthLevel.FUNCTION) def chunk(req: func.HttpRequest) -> func.HttpResponse: """Divide document into chunks of text.""" return function_chunk(req) -@app.route("Vector_Embed", auth_level=func.AuthLevel.ANONYMOUS) +@app.route("Vector_Embed", auth_level=func.AuthLevel.FUNCTION) def vector_embed(req: func.HttpRequest) -> func.HttpResponse: """Convert text to vector embedding.""" return function_vector_embed(req) From cabea9fe53b402ac060005459f022127257d5242 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Wed, 28 Jan 2026 15:32:59 -0800 Subject: [PATCH 09/54] switch to managed identity --- mlops/acs_config/documentDataSource.json | 4 +++ mlops/acs_config/documentIndex.json | 6 ++++- mlops/deployment_scripts/build_indexer.py | 31 +++++++++++++++-------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/mlops/acs_config/documentDataSource.json b/mlops/acs_config/documentDataSource.json index 5e4eba9..2a08e79 100644 --- a/mlops/acs_config/documentDataSource.json +++ b/mlops/acs_config/documentDataSource.json @@ -6,6 +6,10 @@ "credentials": { "connectionString": "{connection_string}" }, + "identity": { + "@odata.type": "#Microsoft.Azure.Search.DataUserAssignedIdentity", + "userAssignedIdentity": "{identity_connection_string}" + }, "container": { "name": "{container_name}", "query": null diff --git a/mlops/acs_config/documentIndex.json b/mlops/acs_config/documentIndex.json index 0d8fe13..6e734cf 100644 --- a/mlops/acs_config/documentIndex.json +++ b/mlops/acs_config/documentIndex.json @@ -156,7 +156,11 @@ "azureOpenAIParameters": { "resourceUri": "{openai_api_endpoint}", "deploymentId": "{openai_embedding_model}", - "modelName": "text-embedding-ada-002" + "modelName": "text-embedding-ada-002", + "authIdentity": { + "@odata.type": "#Microsoft.Azure.Search.DataUserAssignedIdentity", + "userAssignedIdentity": "{identity_connection_string}" + } } } ] diff --git a/mlops/deployment_scripts/build_indexer.py b/mlops/deployment_scripts/build_indexer.py index 2518ac4..0b80b60 100644 --- a/mlops/deployment_scripts/build_indexer.py +++ b/mlops/deployment_scripts/build_indexer.py @@ -34,6 +34,7 @@ def _create_or_update_search_index( file_name: str, bearer_token: str, api_version: str, + identity_resource_id: str, ) -> None: # Use the REST API, there is a bug in the Search SDK that prevents creating the Vector field correctly @@ -57,6 +58,7 @@ def _create_or_update_search_index( aoai_config["aoai_embedding_model_deployment"], ) index_def = index_def.replace("{openai_embedding_model}", aoai_config["aoai_embedding_model_deployment"]) + index_def = index_def.replace("{identity_connection_string}", identity_resource_id) response = requests.put( url=index_url, data=index_def, params=params, headers=headers @@ -108,21 +110,22 @@ def _get_identity_resource( resource_group_name: str, managed_identity_name: str ) -> str: - resource_string = f"/subscriptions/{subscription_id}/resourcegroups/{resource_group_name}" \ + resource_string = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}" \ f"/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{managed_identity_name}" return resource_string def _generate_data_source_connection( - connection_name: str, file_name: str, conn_string: str, container: str + connection_name: str, file_name: str, conn_string: str, identity_conn_string: str, container: str ): with open(file_name) as data_source_file: data_source_def = data_source_file.read() - data_source_def = data_source_def.replace("{conn_string}", conn_string) + data_source_def = data_source_def.replace("{connection_string}", conn_string) data_source_def = data_source_def.replace("{container_name}", container) data_source_def = data_source_def.replace("{name}", connection_name) + data_source_def = data_source_def.replace("{identity_connection_string}", identity_conn_string) data_source_connection = SearchIndexerDataSourceConnection.deserialize( data_source_def, APPLICATION_JSON_CONTENT_TYPE ) @@ -227,7 +230,19 @@ def main(): # Get the token bearer_token = credential.get_token(aisearch_scope).token - # Create the full document index + conn_string = _get_storage_conn_string( + sub_config["subscription_id"], + sub_config["storage_account_name"], + sub_config["resource_group_name"], + ) + + identity_string = _get_identity_resource( + sub_config["subscription_id"], + sub_config["resource_group_name"], + sub_config["managed_identity_name"] + ) + + # Create the full document index _create_or_update_search_index( aoai_config, search_service_name=acs_config["acs_service_name"], @@ -235,12 +250,7 @@ def main(): file_name=acs_config["acs_document_index_file"], bearer_token=bearer_token, api_version=acs_config["acs_api_version"], - ) - - conn_string = _get_storage_conn_string( - sub_config["subscription_id"], - sub_config["storage_account_name"], - sub_config["resource_group_name"], + identity_resource_id=identity_string ) search_indexer_client = SearchIndexerClient( @@ -257,6 +267,7 @@ def main(): generate_data_source_name(), file_name=acs_config["acs_document_data_source"], conn_string=conn_string, + identity_conn_string=identity_string, container=storage_container, ) search_indexer_client.create_or_update_data_source_connection( From 1603d35985c5dbd53e85d18fed775fa7a67c2c92 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Wed, 28 Jan 2026 20:27:05 -0800 Subject: [PATCH 10/54] better retry logic --- src/custom_skills/VectorEmbed/__init__.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/custom_skills/VectorEmbed/__init__.py b/src/custom_skills/VectorEmbed/__init__.py index 32e237b..179015e 100644 --- a/src/custom_skills/VectorEmbed/__init__.py +++ b/src/custom_skills/VectorEmbed/__init__.py @@ -3,14 +3,12 @@ import logging import json import jsonschema -import openai from azure.identity import DefaultAzureCredential from openai import AzureOpenAI from tenacity import ( retry, stop_after_attempt, - wait_random_exponential, - retry_if_exception_type + wait_random_exponential ) REQUEST_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "request_schema.json") @@ -76,9 +74,7 @@ def _log_attempt_number(retry_state): print(f"Rate Limit Exceeded! Retry Attempt #: {retry_state.attempt_number} | Chunk: {row}") -@retry(retry=retry_if_exception_type(openai.RateLimitError), - wait=wait_random_exponential(min=1, max=60), - stop=stop_after_attempt(10), after=_log_attempt_number) +@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(10), after=_log_attempt_number) def _generate_embedding(text, aoai_token): """ Generate embeddings for text. From 44069952196ae3200679ac606451e194029adb57 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Wed, 28 Jan 2026 21:01:19 -0800 Subject: [PATCH 11/54] fix evaluation (latest sdk notation) --- .env.sample | 2 +- .github/workflows/ai_pull_ci_workflow.yml | 2 +- .github/workflows/ai_pull_pr_workflow.yml | 2 +- .gitignore | 2 ++ README.md | 2 +- mlops/evaluation/search_evaluation.py | 12 ++++-------- 6 files changed, 10 insertions(+), 12 deletions(-) diff --git a/.env.sample b/.env.sample index 739b5d1..9129181 100644 --- a/.env.sample +++ b/.env.sample @@ -4,7 +4,7 @@ RESOURCE_GROUP_NAME= STORAGE_ACCOUNT_NAME= ACS_SERVICE_NAME= AOAI_BASE_ENDPOINT= -AI_STUDIO_PROJECT_NAME= +AI_FOUNDRY_PROJECT_URI="https://${AI_FOUNDRY_NAME}.services.ai.azure.com/api/projects/${PROJECT_NAME}" MANAGED_IDENTITY_CLIENT_ID= MANAGED_IDENTITY_NAME= ENABLE_TELEMETRY=true diff --git a/.github/workflows/ai_pull_ci_workflow.yml b/.github/workflows/ai_pull_ci_workflow.yml index c86224c..66ff003 100644 --- a/.github/workflows/ai_pull_ci_workflow.yml +++ b/.github/workflows/ai_pull_ci_workflow.yml @@ -14,7 +14,7 @@ env: STORAGE_ACCOUNT_NAME: ${{ vars.STORAGE_ACCOUNT_NAME }} ACS_SERVICE_NAME: ${{ vars.ACS_SERVICE_NAME }} AOAI_BASE_ENDPOINT: ${{ vars.AOAI_BASE_ENDPOINT }} - AI_STUDIO_PROJECT_NAME: ${{ vars.AI_STUDIO_PROJECT_NAME }} + AI_FOUNDRY_PROJECT_URI: ${{ vars.AI_FOUNDRY_PROJECT_URI }} MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index a0fa2d9..a25342a 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -12,7 +12,7 @@ env: STORAGE_ACCOUNT_NAME: ${{ vars.STORAGE_ACCOUNT_NAME }} ACS_SERVICE_NAME: ${{ vars.ACS_SERVICE_NAME }} AOAI_BASE_ENDPOINT: ${{ vars.AOAI_BASE_ENDPOINT }} - AI_STUDIO_PROJECT_NAME: ${{ vars.AI_STUDIO_PROJECT_NAME }} + AI_FOUNDRY_PROJECT_URI: ${{ vars.AI_FOUNDRY_PROJECT_URI }} MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} diff --git a/.gitignore b/.gitignore index 5c74b11..713005f 100644 --- a/.gitignore +++ b/.gitignore @@ -417,3 +417,5 @@ local.settings.json # Other .DS_Store + +results diff --git a/README.md b/README.md index d4b5586..302aefd 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,7 @@ Some variables and secrets should be provided to execute the github workflows (p - storage_account_name - acs_service_name - aoai_base_endpoint -- ai_studio_project_name +- ai_foundry_project_uri ## Related Projects diff --git a/mlops/evaluation/search_evaluation.py b/mlops/evaluation/search_evaluation.py index a15b0de..587aff4 100644 --- a/mlops/evaluation/search_evaluation.py +++ b/mlops/evaluation/search_evaluation.py @@ -27,7 +27,7 @@ def main(index_name: str, semantic_config: str, data_path: str): subscription_id = os.environ.get("SUBSCRIPTION_ID") resource_group = os.environ.get("RESOURCE_GROUP_NAME") - project_name = os.environ.get("AI_STUDIO_PROJECT_NAME") + project_name = os.environ.get("AI_FOUNDRY_PROJECT_URI") azure_search_service_name = os.environ.get("ACS_SERVICE_NAME") azure_search_endpoint = f"https://{azure_search_service_name}.search.windows.net" @@ -68,7 +68,7 @@ def main(index_name: str, semantic_config: str, data_path: str): } # Create results directory if it does not exist - results_dir = "./results" + results_dir = os.path.join(os.getcwd(), "results") if not os.path.exists(results_dir): os.makedirs(results_dir) @@ -79,12 +79,8 @@ def main(index_name: str, semantic_config: str, data_path: str): target=target, evaluators=evaluators, evaluator_config=evaluators_config, - azure_ai_project={ - "subscription_id": subscription_id, - "resource_group_name": resource_group, - "project_name": project_name, - }, - output_path=f"{results_dir}/{experiment_name}.json", + azure_ai_project=project_name, + output_path=os.path.join(results_dir, f"{experiment_name}.json"), ) print(results["studio_url"]) From 7013c50e7e35d3d5c2e100ea0970b9f55d64a2a1 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Wed, 28 Jan 2026 21:47:42 -0800 Subject: [PATCH 12/54] test login --- .github/workflows/build_devops_container.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_devops_container.yml b/.github/workflows/build_devops_container.yml index 8942519..5dfc351 100644 --- a/.github/workflows/build_devops_container.yml +++ b/.github/workflows/build_devops_container.yml @@ -35,6 +35,7 @@ jobs: - name: Azure login uses: azure/login@v2 with: + auth-type: IDENTITY client-id: ${{ env.MANAGED_IDENTITY_CLIENT_ID }} tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID}} subscription-id: ${{ env.SUBSCRIPTION_ID }} From 8fd606f0a4620def26496108920ec4469383209f Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 11:00:09 -0800 Subject: [PATCH 13/54] separate identities --- .github/workflows/ai_pull_ci_workflow.yml | 3 ++- .github/workflows/ai_pull_pr_workflow.yml | 9 +++++---- .github/workflows/build_devops_container.yml | 5 ++--- .github/workflows/data_initialization_workflow.yml | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ai_pull_ci_workflow.yml b/.github/workflows/ai_pull_ci_workflow.yml index 66ff003..82aec51 100644 --- a/.github/workflows/ai_pull_ci_workflow.yml +++ b/.github/workflows/ai_pull_ci_workflow.yml @@ -18,6 +18,7 @@ env: MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} permissions: id-token: write @@ -39,7 +40,7 @@ jobs: - name: Azure login uses: azure/login@v2 with: - client-id: ${{ env.MANAGED_IDENTITY_CLIENT_ID }} + client-id: ${{ env.FEDERATED_CLIENT_ID }} tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID}} subscription-id: ${{ env.SUBSCRIPTION_ID }} diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index a25342a..9b96b70 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -16,6 +16,7 @@ env: MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} permissions: id-token: write @@ -46,28 +47,28 @@ jobs: - name: Azure login uses: azure/login@v2 with: - client-id: ${{ env.MANAGED_IDENTITY_CLIENT_ID }} + client-id: ${{ env.FEDERATED_CLIENT_ID }} tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID}} subscription-id: ${{ env.SUBSCRIPTION_ID }} - name: Execute Azure Functions Deployment shell: bash run: | - python -u -m mlops.deployment_scripts.deploy_azure_functions + python -u -m mlops.deployment_scripts.deploy_azure_functions --ignore_slot env: BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} - name: Validate Azure Functions Deployment shell: bash run: | - python -u -m mlops.deployment_scripts.run_functions + python -u -m mlops.deployment_scripts.run_functions --ignore_slot env: BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} - name: Deploy Indexer shell: bash run: | - python -u -m mlops.deployment_scripts.build_indexer + python -u -m mlops.deployment_scripts.build_indexer --ignore_slot env: BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} diff --git a/.github/workflows/build_devops_container.yml b/.github/workflows/build_devops_container.yml index 5dfc351..2e6a08c 100644 --- a/.github/workflows/build_devops_container.yml +++ b/.github/workflows/build_devops_container.yml @@ -13,7 +13,7 @@ env: IMAGE_NAME: devops_container_image SUBSCRIPTION_ID: ${{ vars.SUBSCRIPTION_ID }} RESOURCE_GROUP_NAME: ${{ vars.RESOURCE_GROUP_NAME }} - MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} ACR_CONTAINER_REGISTRY: ${{ vars.ACR_CONTAINER_REGISTRY }} @@ -35,8 +35,7 @@ jobs: - name: Azure login uses: azure/login@v2 with: - auth-type: IDENTITY - client-id: ${{ env.MANAGED_IDENTITY_CLIENT_ID }} + client-id: ${{ env.FEDERATED_CLIENT_ID }} tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID}} subscription-id: ${{ env.SUBSCRIPTION_ID }} diff --git a/.github/workflows/data_initialization_workflow.yml b/.github/workflows/data_initialization_workflow.yml index 0856e5c..0675d06 100644 --- a/.github/workflows/data_initialization_workflow.yml +++ b/.github/workflows/data_initialization_workflow.yml @@ -9,7 +9,7 @@ env: SUBSCRIPTION_ID: ${{ vars.SUBSCRIPTION_ID }} RESOURCE_GROUP_NAME: ${{ vars.RESOURCE_GROUP_NAME }} STORAGE_ACCOUNT_NAME: ${{ vars.STORAGE_ACCOUNT_NAME }} - MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} permissions: @@ -32,7 +32,7 @@ jobs: - name: Azure login uses: azure/login@v2 with: - client-id: ${{ env.MANAGED_IDENTITY_CLIENT_ID }} + client-id: ${{ env.FEDERATED_CLIENT_ID }} tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID}} subscription-id: ${{ env.SUBSCRIPTION_ID }} From 4656d6e33f911e8382315dd61e7a91a9ee73ab24 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 13:02:45 -0800 Subject: [PATCH 14/54] updating dependencies --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4f71e83..4d4c3fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,7 @@ pytest-mock==3.12.0 pytest==7.4.0 azure-mgmt-authorization>=4.0.0 python-dotenv>=0.10.3 -azure-mgmt-search==9.1.0 -azure-mgmt-storage==21.1.0 -azure-search-documents==11.6.0b5 +azure-mgmt-search +azure-mgmt-storage +azure-search-documents azure-ai-evaluation From 07840cddfc728fe26bcfa11ca517a26e5bbeb8aa Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 13:08:25 -0800 Subject: [PATCH 15/54] fix requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4d4c3fa..5bba143 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ flake8-docstrings==1.7.0 flake8==6.1.0 pep8-naming==0.13.3 pytest-cov==4.1.0 -pytest-azurepipelines==1.0.5 +# pytest-azurepipelines==1.0.5 pytest-mock==3.12.0 pytest==7.4.0 azure-mgmt-authorization>=4.0.0 From cf71b9ca1a09932fc1a370a689af85af3bdf2213 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 13:15:36 -0800 Subject: [PATCH 16/54] one more fix --- .buildcontainer/Dockerfile | 2 +- requirements.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 01c2d31..2459f36 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -22,7 +22,7 @@ ENV PATH=/opt/miniconda/bin:$PATH WORKDIR /home/$USERNAME # Install Azure CLI -RUN sudo apt-get update && sudo apt-get install -y gnupg software-properties-common curl && \ +RUN sudo apt-get update && sudo apt-get install -y gnupg software-properties-common curl build-essential && \ curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && \ az config set extension.use_dynamic_install=yes_without_prompt && \ az extension add -n ml diff --git a/requirements.txt b/requirements.txt index 5bba143..f730753 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,6 @@ pytest-cov==4.1.0 pytest-mock==3.12.0 pytest==7.4.0 azure-mgmt-authorization>=4.0.0 -python-dotenv>=0.10.3 azure-mgmt-search azure-mgmt-storage azure-search-documents From 063306ba72b6a60f9a2d671c83d788c0859b0d81 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 13:21:33 -0800 Subject: [PATCH 17/54] relaxing requirements --- .buildcontainer/Dockerfile | 3 ++- requirements.txt | 13 ++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 2459f36..7d2f421 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -30,7 +30,8 @@ RUN sudo apt-get update && sudo apt-get install -y gnupg software-properties-com COPY requirements.txt . # Create Conda environment and install Python dependencies -RUN conda create -n llm-env python=3.12 pip=23.2 -q -y && \ +RUN conda create -n llm-env python=3.12 -q -y && \ + conda run -n llm-env pip install --upgrade pip && \ conda run -n llm-env pip install -r requirements.txt && \ conda clean -a -y && \ conda run -n llm-env pip list diff --git a/requirements.txt b/requirements.txt index f730753..f707f73 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,13 +5,12 @@ azure-functions>=1.17.0 azure-storage-blob>=12.19.0 azure-mgmt-web>=7.2.0 pyyaml -flake8-docstrings==1.7.0 -flake8==6.1.0 -pep8-naming==0.13.3 -pytest-cov==4.1.0 -# pytest-azurepipelines==1.0.5 -pytest-mock==3.12.0 -pytest==7.4.0 +flake8-docstrings>=1.7.0 +flake8>=6.1.0 +pep8-naming>=0.13.3 +pytest-cov>=4.1.0 +pytest-mock>=3.12.0 +pytest>=7.4.0 azure-mgmt-authorization>=4.0.0 azure-mgmt-search azure-mgmt-storage From 2cd04dc6f441eff0d52dcbcee73169b72ecb3053 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 13:35:11 -0800 Subject: [PATCH 18/54] spliting run --- .buildcontainer/Dockerfile | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 7d2f421..8cd6a4d 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -29,11 +29,17 @@ RUN sudo apt-get update && sudo apt-get install -y gnupg software-properties-com COPY requirements.txt . -# Create Conda environment and install Python dependencies -RUN conda create -n llm-env python=3.12 -q -y && \ - conda run -n llm-env pip install --upgrade pip && \ - conda run -n llm-env pip install -r requirements.txt && \ - conda clean -a -y && \ +# Create Conda environment +RUN conda create -n llm-env python=3.12 -q -y + +# Upgrade pip and build tools +RUN conda run -n llm-env pip install --upgrade pip setuptools wheel + +# Install dependencies +RUN conda run -n llm-env pip install -r requirements.txt + +# Cleanup and list +RUN conda clean -a -y && \ conda run -n llm-env pip list RUN echo "conda activate llm-env" >> /home/$USERNAME/.bashrc From 02acc02460c6563e2d68b055f1cbc5629f2ee01d Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 13:44:49 -0800 Subject: [PATCH 19/54] switching to ms image --- .buildcontainer/Dockerfile | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 8cd6a4d..2d028b9 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -1,36 +1,26 @@ -FROM ubuntu:22.04 +FROM mcr.microsoft.com/devcontainers/miniconda:3 ARG USERNAME=vscode USER root -RUN apt-get update -y && apt-get install -y sudo wget gnupg software-properties-common curl bash git jq && \ - # Download and install Miniconda - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ - bash /tmp/miniconda.sh -b -p /opt/miniconda && \ - rm /tmp/miniconda.sh && \ - # Create a non-root user - useradd -m -s /bin/bash $USERNAME && \ - echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME && \ - chmod 0440 /etc/sudoers.d/$USERNAME && \ - # Change ownership of Miniconda to the non-root user - chown -R $USERNAME:$USERNAME /opt/miniconda - -ENV PATH=/opt/miniconda/bin:$PATH - -# Install dependencies -WORKDIR /home/$USERNAME +# Install dependencies (Base image has git, curl, jq, sudo) +RUN apt-get update && apt-get install -y build-essential software-properties-common # Install Azure CLI -RUN sudo apt-get update && sudo apt-get install -y gnupg software-properties-common curl build-essential && \ - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && \ +RUN curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && \ az config set extension.use_dynamic_install=yes_without_prompt && \ az extension add -n ml +# Setup User (vscode user already exists in devcontainer image) +USER $USERNAME +WORKDIR /home/$USERNAME + COPY requirements.txt . -# Create Conda environment -RUN conda create -n llm-env python=3.12 -q -y +# Configure Conda and create environment +RUN conda config --add channels conda-forge && \ + conda create -n llm-env python=3.12 -y # Upgrade pip and build tools RUN conda run -n llm-env pip install --upgrade pip setuptools wheel @@ -44,6 +34,6 @@ RUN conda clean -a -y && \ RUN echo "conda activate llm-env" >> /home/$USERNAME/.bashrc -ENV PATH=/opt/miniconda/envs/llm-env/bin:$PATH +ENV PATH=/opt/conda/envs/llm-env/bin:$PATH CMD ["conda", "run", "-n", "llm-env", "python", "--version"] \ No newline at end of file From d902c78d53efa77a535e54fa7be1b1a394fb1bac Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 13:56:17 -0800 Subject: [PATCH 20/54] switch to non0interactive mode --- .buildcontainer/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 2d028b9..0c481a3 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -5,7 +5,8 @@ ARG USERNAME=vscode USER root # Install dependencies (Base image has git, curl, jq, sudo) -RUN apt-get update && apt-get install -y build-essential software-properties-common +RUN apt-get update && export DEBIAN_FRONTEND=noninteractive && \ + apt-get install -y build-essential software-properties-common gnupg # Install Azure CLI RUN curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && \ From c9fcce2e1331ce689cf6faf0c6ab472207086390 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 14:02:57 -0800 Subject: [PATCH 21/54] switched to simpler image --- .buildcontainer/Dockerfile | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 0c481a3..989e6fe 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -1,40 +1,30 @@ -FROM mcr.microsoft.com/devcontainers/miniconda:3 +FROM mcr.microsoft.com/devcontainers/python:3.12 ARG USERNAME=vscode USER root -# Install dependencies (Base image has git, curl, jq, sudo) -RUN apt-get update && export DEBIAN_FRONTEND=noninteractive && \ - apt-get install -y build-essential software-properties-common gnupg - # Install Azure CLI RUN curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && \ az config set extension.use_dynamic_install=yes_without_prompt && \ az extension add -n ml -# Setup User (vscode user already exists in devcontainer image) +# Setup User USER $USERNAME WORKDIR /home/$USERNAME COPY requirements.txt . -# Configure Conda and create environment -RUN conda config --add channels conda-forge && \ - conda create -n llm-env python=3.12 -y - -# Upgrade pip and build tools -RUN conda run -n llm-env pip install --upgrade pip setuptools wheel - -# Install dependencies -RUN conda run -n llm-env pip install -r requirements.txt - -# Cleanup and list -RUN conda clean -a -y && \ - conda run -n llm-env pip list +# Create virtual environment and install dependencies +# We use a venv named 'llm-env' to maintain consistency with previous conda setup +RUN python -m venv llm-env && \ + /home/$USERNAME/llm-env/bin/pip install --upgrade pip setuptools wheel && \ + /home/$USERNAME/llm-env/bin/pip install -r requirements.txt -RUN echo "conda activate llm-env" >> /home/$USERNAME/.bashrc +# Configure shell to use the environment +RUN echo "source /home/$USERNAME/llm-env/bin/activate" >> /home/$USERNAME/.bashrc -ENV PATH=/opt/conda/envs/llm-env/bin:$PATH +# Add venv to PATH +ENV PATH="/home/$USERNAME/llm-env/bin:$PATH" -CMD ["conda", "run", "-n", "llm-env", "python", "--version"] \ No newline at end of file +CMD ["python", "--version"] \ No newline at end of file From 0faf6264d2f100d8dde4efb3cc7377d9bdb22eb6 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 14:05:03 -0800 Subject: [PATCH 22/54] removed ml extension --- .buildcontainer/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 989e6fe..494b7f9 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -6,8 +6,7 @@ USER root # Install Azure CLI RUN curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && \ - az config set extension.use_dynamic_install=yes_without_prompt && \ - az extension add -n ml + az config set extension.use_dynamic_install=yes_without_prompt # Setup User USER $USERNAME From 1c5748f1fa3187ccb9ffbf1df33d12ba98a48489 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Thu, 29 Jan 2026 14:28:23 -0800 Subject: [PATCH 23/54] install cli using pip --- .buildcontainer/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 494b7f9..56d9242 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -5,7 +5,7 @@ ARG USERNAME=vscode USER root # Install Azure CLI -RUN curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && \ +RUN pip install azure-cli && \ az config set extension.use_dynamic_install=yes_without_prompt # Setup User From a3fff18fb0ea890f7b87de8c134d0d331d80e16c Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 10:11:29 -0800 Subject: [PATCH 24/54] function name intro --- .github/workflows/ai_pull_ci_workflow.yml | 1 + .github/workflows/ai_pull_pr_workflow.yml | 1 + mlops/deployment_scripts/build_indexer.py | 2 +- mlops/evaluation/search_evaluation.py | 2 -- 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ai_pull_ci_workflow.yml b/.github/workflows/ai_pull_ci_workflow.yml index 82aec51..5155034 100644 --- a/.github/workflows/ai_pull_ci_workflow.yml +++ b/.github/workflows/ai_pull_ci_workflow.yml @@ -19,6 +19,7 @@ env: MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} + FUNCTIONS_APP_NAME: ${{ vars.FUNCTIONS_APP_NAME }} permissions: id-token: write diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index 9b96b70..70fc4fc 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -17,6 +17,7 @@ env: MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} + FUNCTIONS_APP_NAME: ${{ vars.FUNCTIONS_APP_NAME }} permissions: id-token: write diff --git a/mlops/deployment_scripts/build_indexer.py b/mlops/deployment_scripts/build_indexer.py index 0b80b60..f67f329 100644 --- a/mlops/deployment_scripts/build_indexer.py +++ b/mlops/deployment_scripts/build_indexer.py @@ -242,7 +242,7 @@ def main(): sub_config["managed_identity_name"] ) - # Create the full document index + # Create the full document index _create_or_update_search_index( aoai_config, search_service_name=acs_config["acs_service_name"], diff --git a/mlops/evaluation/search_evaluation.py b/mlops/evaluation/search_evaluation.py index 587aff4..ad2228e 100644 --- a/mlops/evaluation/search_evaluation.py +++ b/mlops/evaluation/search_evaluation.py @@ -25,8 +25,6 @@ def main(index_name: str, semantic_config: str, data_path: str): """ experiment_name = generate_experiment_name(index_name) - subscription_id = os.environ.get("SUBSCRIPTION_ID") - resource_group = os.environ.get("RESOURCE_GROUP_NAME") project_name = os.environ.get("AI_FOUNDRY_PROJECT_URI") azure_search_service_name = os.environ.get("ACS_SERVICE_NAME") azure_search_endpoint = f"https://{azure_search_service_name}.search.windows.net" From 3759e35f470fe469e7e44d3c5c67d06e062d64db Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 10:17:22 -0800 Subject: [PATCH 25/54] fix typo --- .github/workflows/ai_pull_ci_workflow.yml | 2 +- .github/workflows/ai_pull_pr_workflow.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ai_pull_ci_workflow.yml b/.github/workflows/ai_pull_ci_workflow.yml index 5155034..6d62276 100644 --- a/.github/workflows/ai_pull_ci_workflow.yml +++ b/.github/workflows/ai_pull_ci_workflow.yml @@ -19,7 +19,7 @@ env: MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} - FUNCTIONS_APP_NAME: ${{ vars.FUNCTIONS_APP_NAME }} + FUNCTION_APP_NAME: ${{ vars.FUNCTION_APP_NAME }} permissions: id-token: write diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index 70fc4fc..608ad37 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -17,7 +17,7 @@ env: MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} - FUNCTIONS_APP_NAME: ${{ vars.FUNCTIONS_APP_NAME }} + FUNCTION_APP_NAME: ${{ vars.FUNCTION_APP_NAME }} permissions: id-token: write From 46dcca2ed69a1b202c30f23bbef06ce0b66fe597 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 10:24:52 -0800 Subject: [PATCH 26/54] smaller dataset --- .github/workflows/ai_pull_pr_workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index 608ad37..bcd7c80 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -76,7 +76,7 @@ jobs: - name: Execute search evaluation shell: bash run: | - python -u -m mlops.evaluation.search_evaluation --gt_path "./mlops/evaluation/data/search_evaluation_data.jsonl" --semantic_config my-semantic-config + python -u -m mlops.evaluation.search_evaluation --gt_path "./mlops/evaluation/data/search_evaluation_data_sample.jsonl" --semantic_config my-semantic-config env: BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} From 53a2e8daa18e2f924b3eb282cf5c02165c740d79 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 10:32:56 -0800 Subject: [PATCH 27/54] image as variable --- .github/workflows/ai_pull_ci_workflow.yml | 2 +- .github/workflows/ai_pull_pr_workflow.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ai_pull_ci_workflow.yml b/.github/workflows/ai_pull_ci_workflow.yml index 6d62276..1bf39da 100644 --- a/.github/workflows/ai_pull_ci_workflow.yml +++ b/.github/workflows/ai_pull_ci_workflow.yml @@ -29,7 +29,7 @@ jobs: name: Deployment and Evaluation runs-on: ubuntu-latest container: - image: aipullacr.azurecr.io/devops_container_image:latest + image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: username: ${{ secrets.ACR_USERNAME }} diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index bcd7c80..5e597dd 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -35,7 +35,7 @@ jobs: name: Deployment and Evaluation runs-on: ubuntu-latest container: - image: aipullacr.azurecr.io/devops_container_image:latest + image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: username: ${{ secrets.ACR_USERNAME }} From 9164766e0fd0a1f2b8b1c630d739fa1158a0f964 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 10:45:31 -0800 Subject: [PATCH 28/54] more container fixes --- .github/workflows/build_devops_container.yml | 2 +- .github/workflows/build_validation_workflow.yml | 2 +- .github/workflows/data_initialization_workflow.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_devops_container.yml b/.github/workflows/build_devops_container.yml index 2e6a08c..159e1b9 100644 --- a/.github/workflows/build_devops_container.yml +++ b/.github/workflows/build_devops_container.yml @@ -10,7 +10,7 @@ on: - '.buildcontainer/Dockerfile' env: - IMAGE_NAME: devops_container_image + IMAGE_NAME: ${{ vars.IMAGE_NAME }} SUBSCRIPTION_ID: ${{ vars.SUBSCRIPTION_ID }} RESOURCE_GROUP_NAME: ${{ vars.RESOURCE_GROUP_NAME }} FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} diff --git a/.github/workflows/build_validation_workflow.yml b/.github/workflows/build_validation_workflow.yml index bc43eec..4615a3b 100644 --- a/.github/workflows/build_validation_workflow.yml +++ b/.github/workflows/build_validation_workflow.yml @@ -13,7 +13,7 @@ jobs: run-unit-tests: runs-on: ubuntu-latest container: - image: aipullacr.azurecr.io/devops_container_image:latest + image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: username: ${{ secrets.ACR_USERNAME }} diff --git a/.github/workflows/data_initialization_workflow.yml b/.github/workflows/data_initialization_workflow.yml index 0675d06..e27fbaf 100644 --- a/.github/workflows/data_initialization_workflow.yml +++ b/.github/workflows/data_initialization_workflow.yml @@ -20,7 +20,7 @@ jobs: upload-data: runs-on: ubuntu-latest container: - image: aipullacr.azurecr.io/devops_container_image:latest + image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: username: ${{ secrets.ACR_USERNAME }} From 0e9be7d81b6c0c680426fb7d1fc8d132739b3072 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 11:25:36 -0800 Subject: [PATCH 29/54] telemetry completion --- .env.sample | 1 - README.md | 10 ++++++---- config/config.yaml | 2 ++ mlops/common/config_utils.py | 4 ++++ mlops/deployment_scripts/deploy_azure_functions.py | 12 +++++++++++- 5 files changed, 23 insertions(+), 6 deletions(-) diff --git a/.env.sample b/.env.sample index 9129181..2a369f9 100644 --- a/.env.sample +++ b/.env.sample @@ -7,5 +7,4 @@ AOAI_BASE_ENDPOINT= AI_FOUNDRY_PROJECT_URI="https://${AI_FOUNDRY_NAME}.services.ai.azure.com/api/projects/${PROJECT_NAME}" MANAGED_IDENTITY_CLIENT_ID= MANAGED_IDENTITY_NAME= -ENABLE_TELEMETRY=true FUNCTION_APP_NAME= diff --git a/README.md b/README.md index 302aefd..c869950 100644 --- a/README.md +++ b/README.md @@ -176,10 +176,6 @@ Some variables and secrets should be provided to execute the github workflows (p - aoai_base_endpoint - ai_foundry_project_uri -## Related Projects - -- [mlops-promptflow-prompt](https://github.com/microsoft/mlops-promptflow-prompt) - This repository demonstrates how AI Fondry and Prompt flow can be utilized in the Machine Learning Development and Operations (MLOps) process for LLM-based applications (aka LLMOps). It has base examples for inference evaluation using Prompt flow. When combined with [mlops-aisearch-pull](/README.md) for search evaluation, a full end-to-end MLOPs workflow can be achieved. - ## Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a @@ -194,6 +190,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct](https://ope For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. +## Data Collection + +The software may collect information about you and your use of the software and send it to Microsoft. Microsoft may use this information to provide services and improve our products and services. You may turn off the telemetry as described below. There are also some features in the software that may enable you and Microsoft to collect data from users of your applications. If you use these features, you must comply with applicable law, including providing appropriate notices to users of your applications together with a copy of Microsoft’s privacy statement. Our privacy statement is located at [https://go.microsoft.com/fwlink/?LinkID=824704](https://go.microsoft.com/fwlink/?LinkID=824704). You can learn more about data collection and use in the help documentation and our privacy statement. Your use of the software operates as your consent to these practices. + +The enable_telemetry configuration in config/config.yaml enables anonymous telemetry that helps us justify ongoing investment in maintaining and improving this template. Keeping this enabled supports the project and future feature development. To opt out of this telemetry, simply remove enable_telemetry. + ## Trademarks This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft diff --git a/config/config.yaml b/config/config.yaml index 5563358..dc1e47c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -6,6 +6,8 @@ sub_config: managed_identity_client_id: ${MANAGED_IDENTITY_CLIENT_ID} managed_identity_name: ${MANAGED_IDENTITY_NAME} +enable_telemetry: true + # Azure OpenAI configuration. aoai_config: aoai_api_base: ${AOAI_BASE_ENDPOINT} diff --git a/mlops/common/config_utils.py b/mlops/common/config_utils.py index 701797d..b0436cd 100644 --- a/mlops/common/config_utils.py +++ b/mlops/common/config_utils.py @@ -25,6 +25,10 @@ def __init__( def __getattr__(self, __name: str) -> Any: """Get values for top level keys in configuration.""" return self._raw_config[__name] + + def has_key(self, key_name: str) -> bool: + """Check if the configuration has a given top level key.""" + return key_name in self._raw_config def get_flow_config(self, flow_name: str) -> Dict: """Get the pipeline configuration for given flow name and environment.""" diff --git a/mlops/deployment_scripts/deploy_azure_functions.py b/mlops/deployment_scripts/deploy_azure_functions.py index 12da5aa..0418725 100644 --- a/mlops/deployment_scripts/deploy_azure_functions.py +++ b/mlops/deployment_scripts/deploy_azure_functions.py @@ -5,6 +5,7 @@ import time import argparse import subprocess +import os from azure.identity import DefaultAzureCredential from azure.mgmt.web import WebSiteManagementClient @@ -72,11 +73,16 @@ def _wait_for_functions_ready( params = {"api-version": FUNCTION_API_VERSION} headers = { "Content-Type": APPLICATION_JSON_CONTENT_TYPE, - "User-Agent": "acce1e78-98c3-42d1-b5fd-a5c2c365fbfe/1.0", "Accept": APPLICATION_JSON_CONTENT_TYPE, "Authorization": "Bearer {access_token}".format(access_token=access_token), } + # TELEMETRY CODE BEGINS + # Can be removed or disabled in config.yaml file + if os.getenv("ENABLE_TELEMETRY", "false").lower() == "true": + headers["User-Agent"] = "acce1e78-98c3-42d1-b5fd-a5c2c365fbfe/1.0" + # TELEMETRY CODE ENDS + for function_name in function_names: if slot is None: url = MANAGEMENT_FUNCTION_URL.format( @@ -275,6 +281,10 @@ def main(): # functions_config contains a section with function settings function_app_name = config.functions_config["function_app_name"] + # TELEMETRY SETTING + if config.has_key("enable_telemetry") and config.enable_telemetry: + os.environ["ENABLE_TELEMETRY"] = "true" + credential = DefaultAzureCredential() # generate a slot name for the functions based on the branch name From 6b700dabe6be908dd93a40696af573f1422a3846 Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 11:31:05 -0800 Subject: [PATCH 30/54] testing acr tokens --- .../workflows/build_validation_workflow.yml | 34 +++++++++++++++---- mlops/common/config_utils.py | 2 +- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_validation_workflow.yml b/.github/workflows/build_validation_workflow.yml index 4615a3b..325c098 100644 --- a/.github/workflows/build_validation_workflow.yml +++ b/.github/workflows/build_validation_workflow.yml @@ -3,21 +3,41 @@ name: Build Validation Workflow on: workflow_call: - secrets: - ACR_USERNAME: - required: true - ACR_PASSWORD: - required: true + +permissions: + id-token: write + contents: read jobs: + get-acr-token: + name: Get ACR Token + runs-on: ubuntu-latest + outputs: + acr_token: ${{ steps.get_token.outputs.acr_token }} + steps: + - name: Azure login + uses: azure/login@v2 + with: + client-id: ${{ vars.FEDERATED_CLIENT_ID }} + tenant-id: ${{ vars.MANAGED_IDENTITY_TENANT_ID}} + subscription-id: ${{ vars.SUBSCRIPTION_ID }} + + - name: Get ACR Access Token + id: get_token + run: | + TOKEN=$(az acr login --name ${{ vars.ACR_CONTAINER_REGISTRY }} --expose-token --output tsv --query accessToken) + echo "::add-mask::$TOKEN" + echo "acr_token=$TOKEN" >> $GITHUB_OUTPUT + run-unit-tests: runs-on: ubuntu-latest + needs: get-acr-token container: image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: - username: ${{ secrets.ACR_USERNAME }} - password: ${{ secrets.ACR_PASSWORD }} + username: 00000000-0000-0000-0000-000000000000 + password: ${{ needs.get-acr-token.outputs.acr_token }} steps: - name: Checkout uses: actions/checkout@v1 diff --git a/mlops/common/config_utils.py b/mlops/common/config_utils.py index b0436cd..a47d78c 100644 --- a/mlops/common/config_utils.py +++ b/mlops/common/config_utils.py @@ -25,7 +25,7 @@ def __init__( def __getattr__(self, __name: str) -> Any: """Get values for top level keys in configuration.""" return self._raw_config[__name] - + def has_key(self, key_name: str) -> bool: """Check if the configuration has a given top level key.""" return key_name in self._raw_config From 1a30d1d4588c4bc7ad1d942843e6b00ba28a9f8f Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 11:33:23 -0800 Subject: [PATCH 31/54] typo --- .github/workflows/ai_pull_pr_workflow.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index 5e597dd..c92ab71 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -27,9 +27,6 @@ jobs: build-validation: name: Build Validation uses: ./.github/workflows/build_validation_workflow.yml - secrets: - ACR_USERNAME: ${{ secrets.ACR_USERNAME }} - ACR_PASSWORD: ${{ secrets.ACR_PASSWORD }} deploy-and-evaluate: name: Deployment and Evaluation From be96ff563e838ee859d65deb7c7cedce737883ae Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 11:37:19 -0800 Subject: [PATCH 32/54] testing acr 2 --- .github/workflows/build_validation_workflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_validation_workflow.yml b/.github/workflows/build_validation_workflow.yml index 325c098..df44d2b 100644 --- a/.github/workflows/build_validation_workflow.yml +++ b/.github/workflows/build_validation_workflow.yml @@ -36,8 +36,8 @@ jobs: image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: - username: 00000000-0000-0000-0000-000000000000 - password: ${{ needs.get-acr-token.outputs.acr_token }} + username: '00000000-0000-0000-0000-000000000000' + password: '${{ needs.get-acr-token.outputs.acr_token }}' steps: - name: Checkout uses: actions/checkout@v1 From 01c2c749f4e947c9b48338282eebc269742be9cf Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 11:42:19 -0800 Subject: [PATCH 33/54] test 3 --- .github/workflows/ai_pull_pr_workflow.yml | 29 ++++++++++++++-- .../workflows/build_validation_workflow.yml | 33 ++++--------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index c92ab71..ca01b99 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -24,20 +24,43 @@ permissions: contents: read pull-requests: write jobs: + get-acr-token: + name: Get ACR Token + runs-on: ubuntu-latest + outputs: + acr_token: ${{ steps.get_token.outputs.acr_token }} + steps: + - name: Azure login + uses: azure/login@v2 + with: + client-id: ${{ vars.FEDERATED_CLIENT_ID }} + tenant-id: ${{ vars.MANAGED_IDENTITY_TENANT_ID}} + subscription-id: ${{ vars.SUBSCRIPTION_ID }} + + - name: Get ACR Access Token + id: get_token + run: | + TOKEN=$(az acr login --name ${{ vars.ACR_CONTAINER_REGISTRY }} --expose-token --output tsv --query accessToken) + echo "::add-mask::$TOKEN" + echo "acr_token=$TOKEN" >> $GITHUB_OUTPUT + build-validation: name: Build Validation + needs: get-acr-token uses: ./.github/workflows/build_validation_workflow.yml + secrets: + ACR_TOKEN: ${{ needs.get-acr-token.outputs.acr_token }} deploy-and-evaluate: name: Deployment and Evaluation runs-on: ubuntu-latest + needs: [get-acr-token, build-validation] container: image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: - username: ${{ secrets.ACR_USERNAME }} - password: ${{ secrets.ACR_PASSWORD }} - needs: build-validation + username: 00000000-0000-0000-0000-000000000000 + password: ${{ needs.get-acr-token.outputs.acr_token }} steps: - name: Checkout Actions uses: actions/checkout@v1 diff --git a/.github/workflows/build_validation_workflow.yml b/.github/workflows/build_validation_workflow.yml index df44d2b..39dff60 100644 --- a/.github/workflows/build_validation_workflow.yml +++ b/.github/workflows/build_validation_workflow.yml @@ -3,41 +3,20 @@ name: Build Validation Workflow on: workflow_call: - -permissions: - id-token: write - contents: read + secrets: + ACR_TOKEN: + required: true jobs: - get-acr-token: - name: Get ACR Token - runs-on: ubuntu-latest - outputs: - acr_token: ${{ steps.get_token.outputs.acr_token }} - steps: - - name: Azure login - uses: azure/login@v2 - with: - client-id: ${{ vars.FEDERATED_CLIENT_ID }} - tenant-id: ${{ vars.MANAGED_IDENTITY_TENANT_ID}} - subscription-id: ${{ vars.SUBSCRIPTION_ID }} - - - name: Get ACR Access Token - id: get_token - run: | - TOKEN=$(az acr login --name ${{ vars.ACR_CONTAINER_REGISTRY }} --expose-token --output tsv --query accessToken) - echo "::add-mask::$TOKEN" - echo "acr_token=$TOKEN" >> $GITHUB_OUTPUT - run-unit-tests: runs-on: ubuntu-latest - needs: get-acr-token container: image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: - username: '00000000-0000-0000-0000-000000000000' - password: '${{ needs.get-acr-token.outputs.acr_token }}' + username: 00000000-0000-0000-0000-000000000000 + password: ${{ secrets.ACR_TOKEN }} + steps: steps: - name: Checkout uses: actions/checkout@v1 From a4b428bed3a3ba9e2f266975721d3a08c3d8672c Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 11:43:57 -0800 Subject: [PATCH 34/54] typo --- .github/workflows/build_validation_workflow.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build_validation_workflow.yml b/.github/workflows/build_validation_workflow.yml index 39dff60..9840ad2 100644 --- a/.github/workflows/build_validation_workflow.yml +++ b/.github/workflows/build_validation_workflow.yml @@ -16,7 +16,6 @@ jobs: credentials: username: 00000000-0000-0000-0000-000000000000 password: ${{ secrets.ACR_TOKEN }} - steps: steps: - name: Checkout uses: actions/checkout@v1 From 188a3a4239bc9d3758701e2d1e477da18dffd63e Mon Sep 17 00:00:00 2001 From: Sergii Baidachnyi Date: Fri, 30 Jan 2026 11:46:50 -0800 Subject: [PATCH 35/54] revert code to password based --- .github/workflows/ai_pull_pr_workflow.yml | 30 ++++--------------- .../workflows/build_validation_workflow.yml | 8 +++-- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index ca01b99..951a742 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -24,43 +24,23 @@ permissions: contents: read pull-requests: write jobs: - get-acr-token: - name: Get ACR Token - runs-on: ubuntu-latest - outputs: - acr_token: ${{ steps.get_token.outputs.acr_token }} - steps: - - name: Azure login - uses: azure/login@v2 - with: - client-id: ${{ vars.FEDERATED_CLIENT_ID }} - tenant-id: ${{ vars.MANAGED_IDENTITY_TENANT_ID}} - subscription-id: ${{ vars.SUBSCRIPTION_ID }} - - - name: Get ACR Access Token - id: get_token - run: | - TOKEN=$(az acr login --name ${{ vars.ACR_CONTAINER_REGISTRY }} --expose-token --output tsv --query accessToken) - echo "::add-mask::$TOKEN" - echo "acr_token=$TOKEN" >> $GITHUB_OUTPUT - build-validation: name: Build Validation - needs: get-acr-token uses: ./.github/workflows/build_validation_workflow.yml secrets: - ACR_TOKEN: ${{ needs.get-acr-token.outputs.acr_token }} + ACR_USERNAME: ${{ secrets.ACR_USERNAME }} + ACR_PASSWORD: ${{ secrets.ACR_PASSWORD }} deploy-and-evaluate: name: Deployment and Evaluation runs-on: ubuntu-latest - needs: [get-acr-token, build-validation] + needs: build-validation container: image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: - username: 00000000-0000-0000-0000-000000000000 - password: ${{ needs.get-acr-token.outputs.acr_token }} + username: ${{ secrets.ACR_USERNAME }} + password: ${{ secrets.ACR_PASSWORD }} steps: - name: Checkout Actions uses: actions/checkout@v1 diff --git a/.github/workflows/build_validation_workflow.yml b/.github/workflows/build_validation_workflow.yml index 9840ad2..4615a3b 100644 --- a/.github/workflows/build_validation_workflow.yml +++ b/.github/workflows/build_validation_workflow.yml @@ -4,7 +4,9 @@ name: Build Validation Workflow on: workflow_call: secrets: - ACR_TOKEN: + ACR_USERNAME: + required: true + ACR_PASSWORD: required: true jobs: @@ -14,8 +16,8 @@ jobs: image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: - username: 00000000-0000-0000-0000-000000000000 - password: ${{ secrets.ACR_TOKEN }} + username: ${{ secrets.ACR_USERNAME }} + password: ${{ secrets.ACR_PASSWORD }} steps: - name: Checkout uses: actions/checkout@v1 From 84e80c3765540598f7cf86d218626545078e7cdf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 18:13:40 +0000 Subject: [PATCH 36/54] Initial plan From 55689f0ea310f2908fd8cdcf27c1c8848efa7c5c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 18:19:11 +0000 Subject: [PATCH 37/54] Update documentation to reflect code changes in PR Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- README.md | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index c869950..ddd16dd 100644 --- a/README.md +++ b/README.md @@ -105,8 +105,8 @@ The deployment scripts and github workflows use the git branch name to create a ### Configuration -- Create an `.env` file based on `.env.sample` and populate the appropriate values. -- Modify `config/config.yaml` to meet any changes that have been made within the project. +- Create an `.env` file based on `.env.sample` and populate the appropriate values. The `AI_FOUNDRY_PROJECT_URI` value should follow the format `https://.services.ai.azure.com/api/projects/`. +- Modify `config/config.yaml` to meet any changes that have been made within the project. The `function_app_name` is read from the `FUNCTION_APP_NAME` environment variable. To disable anonymous telemetry, remove the `enable_telemetry` key from `config/config.yaml`. ### Upload test data @@ -124,6 +124,12 @@ The following deployment script will deploy the custom skillset functions to a f python -m mlops.deployment_scripts.deploy_azure_functions ``` +To deploy directly to the main function app without using a deployment slot (as in CI builds), use the `--ignore_slot` flag: + +```sh +python -m mlops.deployment_scripts.deploy_azure_functions --ignore_slot +``` + To test the two skillset functions after they are deployed, run the following script: ```sh @@ -142,7 +148,7 @@ python -m mlops.deployment_scripts.build_indexer ### Perform Search Evaluation -This will perform search evaluation and upload the result to the AI Studio project specified. For more information about evaluation, see the [search evaluation readme](/mlops/evaluation/readme.md). +This will perform search evaluation and upload the result to the Azure AI Foundry project specified by `AI_FOUNDRY_PROJECT_URI`. For more information about evaluation, see the [search evaluation readme](/mlops/evaluation/readme.md). ```sh python -m mlops.evaluation.search_evaluation --gt_path "./mlops/evaluation/data/search_evaluation_data.jsonl" --semantic_config my-semantic-config @@ -160,21 +166,32 @@ python -m mlops.deployment_scripts.cleanup_pr This project contains github workflows for PR validation and Continuous Integration (CI). -The PR workflow executes quality checks using flake8 and unit tests. It then deploys the skillset functions to a deployment slot of the function app. Once the functions are deployed and tested, an indexer is deployed and all of the test data is ingested from blob storage. Search evaluation is run and uploaded to an AI Studio project. +The PR workflow executes quality checks using flake8 and unit tests. It then deploys the skillset functions to a deployment slot of the function app. Once the functions are deployed and tested, an indexer is deployed and all of the test data is ingested from blob storage. Search evaluation is run, the results are uploaded to an Azure AI Foundry project, and a summary comment is posted on the pull request. The CI workflow executes a similar workflow to the PR workflow, but the skillset functions are deployed to the main function app, not a deployment slot. In order for the cleanup step of the CI Workflow to work correctly, the development branch from a pull request must not be deleted until the cleanup step has run. -Some variables and secrets should be provided to execute the github workflows (primarily the same ones used in the `.env` file for local execution). - -- azure_credentials -- subscription_id -- resource_group_name -- storage_account_name -- acs_service_name -- aoai_base_endpoint -- ai_foundry_project_uri +Some variables and secrets should be provided to execute the github workflows. The following **repository variables** (`vars.*`) are required: + +- `SUBSCRIPTION_ID` +- `RESOURCE_GROUP_NAME` +- `STORAGE_ACCOUNT_NAME` +- `ACS_SERVICE_NAME` +- `AOAI_BASE_ENDPOINT` +- `AI_FOUNDRY_PROJECT_URI` +- `MANAGED_IDENTITY_CLIENT_ID` +- `MANAGED_IDENTITY_NAME` +- `MANAGED_IDENTITY_TENANT_ID` +- `FEDERATED_CLIENT_ID` — client ID of the Microsoft Entra application used by GitHub Actions to authenticate with Azure via OIDC (see [federated identity setup](./docs/federated_identity_openid_connect.md)) +- `FUNCTION_APP_NAME` — name of the Azure Function App used for custom skills deployment +- `ACR_CONTAINER_REGISTRY` — Azure Container Registry name (without `.azurecr.io`) that hosts the DevOps container image +- `IMAGE_NAME` — name of the container image used in the workflows + +The following **repository secrets** (`secrets.*`) are also required: + +- `ACR_USERNAME` — username for authenticating with the Azure Container Registry +- `ACR_PASSWORD` — password for authenticating with the Azure Container Registry ## Contributing From 640f9fcc870226cb6ec89fe8951bfe73c35d0adb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 18:29:13 +0000 Subject: [PATCH 38/54] Update docs folder to reflect code changes in PR Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- docs/ai_search_system_identity.md | 14 +++++++-- docs/durable_azurefunction_deployment.md | 8 +++++ docs/federated_identity_openid_connect.md | 36 +++++++++++++---------- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/docs/ai_search_system_identity.md b/docs/ai_search_system_identity.md index ae29878..83e3e84 100644 --- a/docs/ai_search_system_identity.md +++ b/docs/ai_search_system_identity.md @@ -10,6 +10,16 @@ After the system generates the identity, roles can be assigned to it. For this e ![Roles](./images/ai_identity_2.png) -This concludes the instructions, and you may now proceed with building indexers and indexes without keys. For Azure OpenAI components, the key can be removed without requiring any other modifications. In the case of storage, it is necessary to modify the connection string using the following format: +This concludes the instructions, and you may now proceed with building indexers and indexes without keys. For Azure OpenAI components, the key can be removed without requiring any other modifications. In the case of storage, the data source uses a user-assigned managed identity to access blob storage instead of a connection string key. The `documentDataSource.json` configuration sets both the connection string (using the `ResourceId` format below) and an explicit identity reference: -```ResourceId=/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Storage/storageAccounts/{storage_account_name}``` +``` +ResourceId=/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Storage/storageAccounts/{storage_account_name} +``` + +The user-assigned managed identity is specified by its full Azure resource ID in the format: + +``` +/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{managed_identity_name} +``` + +This identity resource ID is automatically populated from the `MANAGED_IDENTITY_NAME` environment variable during deployment by `build_indexer.py`. diff --git a/docs/durable_azurefunction_deployment.md b/docs/durable_azurefunction_deployment.md index b420a69..747ab1f 100644 --- a/docs/durable_azurefunction_deployment.md +++ b/docs/durable_azurefunction_deployment.md @@ -48,6 +48,14 @@ Additional records should be added to App Settings of Azure Functions: Once all the values are provided Azure Functions will be able to communicate with the associated storage account with no explicit connection strings. +## Function Authentication + +The custom skillset functions use `AuthLevel.FUNCTION`, which means callers must supply a valid function key. Function keys are automatically generated by Azure Functions and can be retrieved programmatically using the Azure Management SDK. The `deploy_azure_functions.py` and `run_functions.py` scripts retrieve the function key as part of the deployment and validation process. + +## Deploying Functions + +Functions are deployed using the Azure CLI `az functionapp deployment source config-zip` command with the `--build-remote true` flag, which triggers a remote build on the Azure App Service. This eliminates the need for `ENABLE_ORYX_BUILD` or `SCM_DO_BUILD_DURING_DEPLOYMENT` app settings. + ## Obtaining Credentials in Code Finally, we need to make sure that Azure Functions can use our identity in code to communicate with other services. diff --git a/docs/federated_identity_openid_connect.md b/docs/federated_identity_openid_connect.md index e7a79af..277b4a4 100644 --- a/docs/federated_identity_openid_connect.md +++ b/docs/federated_identity_openid_connect.md @@ -40,11 +40,13 @@ The `Entity Type` is used to define the scope of the OIDC requests from GitHub W ### Step 3: Set GitHub Secrets/Variables -Create GitHub secrets/variables to store Microsoft Entra application details or user-assigned managed identity for your GitHub secrets: +Create GitHub **repository variables** (not secrets) to store Microsoft Entra application details used by the workflows: -* AZURE_CLIENT_ID -* AZURE_TENANT_ID -* AZURE_SUBSCRIPTION_ID +* `FEDERATED_CLIENT_ID` — the Client ID of the Microsoft Entra application registered in Step 1 +* `MANAGED_IDENTITY_TENANT_ID` — the Directory (tenant) ID of the Microsoft Entra application +* `SUBSCRIPTION_ID` — the Azure subscription ID + +> **Note**: These are stored as repository **variables** (`vars.*`) in GitHub, not as secrets, because they are not sensitive credentials. The actual sensitive values (like container registry passwords) are stored as secrets. ## Workflow @@ -56,27 +58,31 @@ To setup a GitHub workflow we need to implement the following steps: 1. Set GitHub workflows permissions so that the token can work with Azure subscription. The workflow requires `id-token: write` and `contents: read` permissions. The `id-token: write` permission allows the workflow to request an OIDC token from GitHub's OIDC provider. 2. The azure/login@v2 action retrieves the OIDC token and exchanges it with Azure Active Directory (Azure AD) to obtain an access token. Azure AD verifies the OIDC token and issues an access token if the token is valid and the federated identity credential configuration matches. -``` +```yaml name: CI Platform Python Workflow on: - push: + push: branches: - - 'main' + - 'development' + +env: + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} + MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} + SUBSCRIPTION_ID: ${{ vars.SUBSCRIPTION_ID }} permissions: - id-token: write - contents: read + id-token: write + contents: read jobs: - build-and-deploy-python: + build-and-deploy-python: runs-on: ubuntu-latest steps: - - name: Azure login + - name: Azure login uses: azure/login@v2 with: - client-id: ${{ secrets.AZURE_CLIENT_ID }} - tenant-id: ${{ secrets.AZURE_TENANT_ID }} - subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - + client-id: ${{ env.FEDERATED_CLIENT_ID }} + tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID }} + subscription-id: ${{ env.SUBSCRIPTION_ID }} ``` From 596efd8a993d3708eabb50ab35e33f9091e75050 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 18:48:28 +0000 Subject: [PATCH 39/54] Extend README with container workflow, dual client IDs, and deployment slot guidance Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index ddd16dd..773c0c0 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,8 @@ The deployment of **custom skills** poses a unique challenge in data processing Each deployment contains functions that we are using in the indexing process, and we can reference the functions using the slot name in the skillset itself. The deploy_azure_functions.py file contains all needed methods to demonstrate a way to deploy Azure Functions from code. +> **Note on deployment slots**: Deployment slots are only available on **Standard, Premium, and Dedicated App Service plans** — they are not supported on Consumption or Flex Consumption plans. For this reason, the current CI workflows use `--ignore_slot` to deploy directly to the main function app. The code still supports slot-based deployments (the default when `--ignore_slot` is omitted), and engineers who are on a supported plan can take advantage of slots for parallel experimentation. If slots are not available on your plan, each engineer working in parallel should use their **own dedicated Azure Function App** to avoid overwriting each other's deployments during active experiments. + Once all associated APIs, skillsets, indexes, data sources, and indexers are deployed, the SDK can be used to wait until the indexing process is completed. At that point, evaluation can begin. To illustrate the evaluation process, we utilize the Azure AI Evaluation SDK. This tool allows for the execution of complex evaluations either locally or through serverless computing in AI Foundry. Additionally, evaluation results can be published to AI Foundry. The **search_evaluation.py** script provides guidance on setting up the evaluation process using various custom evaluators. It also includes instructions on querying AI Search for data and details on publishing evaluation results to AI Foundry. The following image demonstrates several evaluation results, and it’s possible to note that branch names have been utilized there as well. @@ -77,6 +79,13 @@ The repository illustrates how to operate in a keyless environment without stori - **Azure Functions**: We are using Azure Functions to get access to resources like Azure Blob and Azure OpenAI. Rather than storing keys in the application settings for Azure Functions we utilize user-assigned managed identity. You can find more details visiting this [link](./docs/durable_azurefunction_deployment.md). - **AI Search**: index and data source entities should have access to data (Azure Blob in our case) and Azure OpenAI for data processing. In this template we demonstrate how to use system assigned managed identity avoid storing keys directly. More details can be found [here](./docs/ai_search_system_identity.md). +This template uses **two separate identity client IDs** for different purposes: + +- **`FEDERATED_CLIENT_ID`** — the Client ID of a **Microsoft Entra application** (service principal) registered in Azure AD. It is used exclusively by GitHub Actions to authenticate with Azure via OIDC. GitHub exchanges an OIDC token for a short-lived Azure access token using this identity, so no credentials are stored in GitHub secrets. +- **`MANAGED_IDENTITY_CLIENT_ID`** — the Client ID of a **user-assigned managed identity** that is attached to the Azure Function App and AI Search service. Code running inside the function app uses this identity to access Azure resources (Blob Storage, Azure OpenAI) without storing any keys. + +These two identities serve different trust boundaries: one is for GitHub's CI/CD pipeline, and the other is for the deployed Azure services. In simpler setups it is possible to use a single identity for both purposes, provided the identity has all the required role assignments (Contributor access for deployment, plus resource-level roles for storage and OpenAI). Using separate identities is the recommended approach for least-privilege security. + In addition to providing documentation on the use of managed identities, it is important to note that Azure AI Search may require additional configurations to enable interaction with managed identities. To achieve this, navigate to the **Keys** tab and ensure that either **Role-based access control** or **Both** is selected. ![AI Search access](./docs/images/data_retrieval_8.png) @@ -172,6 +181,14 @@ The CI workflow executes a similar workflow to the PR workflow, but the skillset In order for the cleanup step of the CI Workflow to work correctly, the development branch from a pull request must not be deleted until the cleanup step has run. +### Container-based Workflow Execution + +The PR and CI workflows (and the build validation workflow) run all job steps **inside a Docker container** pulled from an Azure Container Registry (ACR). This container image is pre-built with all Python dependencies, the Azure CLI, and any other tools required by the scripts, ensuring a consistent and fast execution environment. + +The container image is defined in `.buildcontainer/Dockerfile` and is built and pushed to ACR automatically by the `build_devops_container.yml` workflow whenever `requirements.txt` or the Dockerfile changes. The `ACR_CONTAINER_REGISTRY` and `IMAGE_NAME` repository variables control which image is used at runtime. + +**Self-hosted runners**: If you run these workflows on self-hosted machines rather than GitHub-hosted runners, the runner machine must have Docker installed and network access to the ACR. Make sure the runner can authenticate with the registry — the `ACR_USERNAME` and `ACR_PASSWORD` secrets are passed through to the container runtime for this purpose. If your self-hosted runner already has a managed identity or another credential mechanism to access the ACR, you can adapt the workflow to use `az acr login` instead of username/password credentials. + Some variables and secrets should be provided to execute the github workflows. The following **repository variables** (`vars.*`) are required: - `SUBSCRIPTION_ID` From 272602c2f86db64cd05cf53126acdd1b1171c0fb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 18:56:55 +0000 Subject: [PATCH 40/54] Fix container isolation explanation and FEDERATED_CLIENT_ID description in README Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 773c0c0..5cc346d 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ The repository illustrates how to operate in a keyless environment without stori This template uses **two separate identity client IDs** for different purposes: -- **`FEDERATED_CLIENT_ID`** — the Client ID of a **Microsoft Entra application** (service principal) registered in Azure AD. It is used exclusively by GitHub Actions to authenticate with Azure via OIDC. GitHub exchanges an OIDC token for a short-lived Azure access token using this identity, so no credentials are stored in GitHub secrets. +- **`FEDERATED_CLIENT_ID`** — the Client ID of a **user-assigned managed identity or a Microsoft Entra application** configured in Azure AD. It is used exclusively by GitHub Actions to authenticate with Azure via OIDC. GitHub exchanges an OIDC token for a short-lived Azure access token using this identity, so no credentials are stored in GitHub secrets. Both a managed identity and a service principal (app registration) are supported for this purpose. - **`MANAGED_IDENTITY_CLIENT_ID`** — the Client ID of a **user-assigned managed identity** that is attached to the Azure Function App and AI Search service. Code running inside the function app uses this identity to access Azure resources (Blob Storage, Azure OpenAI) without storing any keys. These two identities serve different trust boundaries: one is for GitHub's CI/CD pipeline, and the other is for the deployed Azure services. In simpler setups it is possible to use a single identity for both purposes, provided the identity has all the required role assignments (Contributor access for deployment, plus resource-level roles for storage and OpenAI). Using separate identities is the recommended approach for least-privilege security. @@ -187,7 +187,9 @@ The PR and CI workflows (and the build validation workflow) run all job steps ** The container image is defined in `.buildcontainer/Dockerfile` and is built and pushed to ACR automatically by the `build_devops_container.yml` workflow whenever `requirements.txt` or the Dockerfile changes. The `ACR_CONTAINER_REGISTRY` and `IMAGE_NAME` repository variables control which image is used at runtime. -**Self-hosted runners**: If you run these workflows on self-hosted machines rather than GitHub-hosted runners, the runner machine must have Docker installed and network access to the ACR. Make sure the runner can authenticate with the registry — the `ACR_USERNAME` and `ACR_PASSWORD` secrets are passed through to the container runtime for this purpose. If your self-hosted runner already has a managed identity or another credential mechanism to access the ACR, you can adapt the workflow to use `az acr login` instead of username/password credentials. +Running jobs inside a container provides an important isolation benefit: without containerization, a workflow running on a self-hosted VM could inadvertently pick up environment variables, Python packages, or other libraries left over from a previous workflow run, leading to hard-to-debug inconsistencies. The container guarantees a clean, reproducible environment on every run. + +**Self-hosted runners**: If you run these workflows on self-hosted machines rather than GitHub-hosted runners, the runner machine must have Docker installed and network access to the ACR. Make sure the runner can authenticate with the registry — the `ACR_USERNAME` and `ACR_PASSWORD` secrets are passed through to the container runtime for this purpose. Some variables and secrets should be provided to execute the github workflows. The following **repository variables** (`vars.*`) are required: From 5d9ad330ed47af7fb01ddbd453e50a47d398ccbd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:33:40 +0000 Subject: [PATCH 41/54] Initial plan From 5b8b385e3a01da68643d061fc1fed284a37c6cf5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:33:52 +0000 Subject: [PATCH 42/54] Initial plan From fa4bcf270f2dffc1a2ffc69a2db6ca95ec6453bd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:34:06 +0000 Subject: [PATCH 43/54] Initial plan From b8ccb229de0f81708381575ca205b40dd683c865 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:34:13 +0000 Subject: [PATCH 44/54] Initial plan From f895e7b2b16944fdd13a2d619548c73b1abc3dba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:34:21 +0000 Subject: [PATCH 45/54] Initial plan From 1eb34531c89ce8a362fb53feb34d421418cdb49d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:35:06 +0000 Subject: [PATCH 46/54] Fix subprocess.run: remove shell=True when using list args Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- mlops/deployment_scripts/deploy_azure_functions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mlops/deployment_scripts/deploy_azure_functions.py b/mlops/deployment_scripts/deploy_azure_functions.py index 0418725..930244d 100644 --- a/mlops/deployment_scripts/deploy_azure_functions.py +++ b/mlops/deployment_scripts/deploy_azure_functions.py @@ -165,7 +165,6 @@ def _deploy_functions( "true", ], check=True, - shell=True, ) except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") @@ -233,7 +232,6 @@ def _deploy_functions_withslot( slot_name, ], check=True, - shell=True, ) except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") From 89ed195be2d6ccd460ae0b6e209c8a92b184d139 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:35:28 +0000 Subject: [PATCH 47/54] Fix subprocess.run: remove shell=True when using list-form args Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- mlops/deployment_scripts/deploy_azure_functions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mlops/deployment_scripts/deploy_azure_functions.py b/mlops/deployment_scripts/deploy_azure_functions.py index 0418725..930244d 100644 --- a/mlops/deployment_scripts/deploy_azure_functions.py +++ b/mlops/deployment_scripts/deploy_azure_functions.py @@ -165,7 +165,6 @@ def _deploy_functions( "true", ], check=True, - shell=True, ) except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") @@ -233,7 +232,6 @@ def _deploy_functions_withslot( slot_name, ], check=True, - shell=True, ) except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") From 7e11d7d344a16463fa8d0c6341f4cd2ffc58f0f0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:36:20 +0000 Subject: [PATCH 48/54] Remove misleading requests.exceptions.RequestException handlers from subprocess-only try blocks Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- mlops/deployment_scripts/deploy_azure_functions.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mlops/deployment_scripts/deploy_azure_functions.py b/mlops/deployment_scripts/deploy_azure_functions.py index 0418725..b375d71 100644 --- a/mlops/deployment_scripts/deploy_azure_functions.py +++ b/mlops/deployment_scripts/deploy_azure_functions.py @@ -170,10 +170,6 @@ def _deploy_functions( except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") raise - except requests.exceptions.RequestException: - print( - "Request has been sent, but no response yet. Checking deployment status in the next step." - ) print("Updating Application settings.") @@ -238,10 +234,6 @@ def _deploy_functions_withslot( except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") raise - except requests.exceptions.RequestException: - print( - "Request has been sent, but no response yet. Checking deployment status in the next step." - ) print("Updating Application settings.") existing_app_settings = app_mgmt_client.web_apps.list_application_settings_slot( From 4d20a1fa553ec3a8346ff3b1df314bf17cb28497 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:36:54 +0000 Subject: [PATCH 49/54] Restore targeted retry predicate and fix misleading log message Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- src/custom_skills/VectorEmbed/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/custom_skills/VectorEmbed/__init__.py b/src/custom_skills/VectorEmbed/__init__.py index 179015e..ac67e8b 100644 --- a/src/custom_skills/VectorEmbed/__init__.py +++ b/src/custom_skills/VectorEmbed/__init__.py @@ -3,15 +3,18 @@ import logging import json import jsonschema +import openai from azure.identity import DefaultAzureCredential from openai import AzureOpenAI from tenacity import ( retry, stop_after_attempt, - wait_random_exponential + wait_random_exponential, + retry_if_exception_type ) REQUEST_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "request_schema.json") +TRANSIENT_OPENAI_ERRORS = (openai.RateLimitError, openai.APIConnectionError, openai.APITimeoutError, openai.InternalServerError) def function_vector_embed(req: func.HttpRequest) -> func.HttpResponse: @@ -71,10 +74,11 @@ def _get_request_schema(): def _log_attempt_number(retry_state): """Log retry attempt.""" row = retry_state.args[0] - print(f"Rate Limit Exceeded! Retry Attempt #: {retry_state.attempt_number} | Chunk: {row}") + print(f"Transient error encountered. Retry Attempt #: {retry_state.attempt_number} | Chunk: {row}") -@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(10), after=_log_attempt_number) +@retry(retry=retry_if_exception_type(TRANSIENT_OPENAI_ERRORS), + wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(10), after=_log_attempt_number) def _generate_embedding(text, aoai_token): """ Generate embeddings for text. From 06f817e8e7baebe67c8e15e5099ee357936fb4cc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 19:38:41 +0000 Subject: [PATCH 50/54] Pin dependencies in custom_skills requirements.txt and fix shell=True vulnerabilities Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- .../deploy_azure_functions.py | 2 -- src/custom_skills/requirements.txt | 28 +++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/mlops/deployment_scripts/deploy_azure_functions.py b/mlops/deployment_scripts/deploy_azure_functions.py index 0418725..930244d 100644 --- a/mlops/deployment_scripts/deploy_azure_functions.py +++ b/mlops/deployment_scripts/deploy_azure_functions.py @@ -165,7 +165,6 @@ def _deploy_functions( "true", ], check=True, - shell=True, ) except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") @@ -233,7 +232,6 @@ def _deploy_functions_withslot( slot_name, ], check=True, - shell=True, ) except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") diff --git a/src/custom_skills/requirements.txt b/src/custom_skills/requirements.txt index e565ee5..2c35709 100644 --- a/src/custom_skills/requirements.txt +++ b/src/custom_skills/requirements.txt @@ -2,17 +2,17 @@ # The Python Worker is managed by Azure Functions platform # Manually managing azure-functions-worker may cause unexpected issues -azure-core -azure-functions -azure-identity -azure-storage-blob -jsonschema -openai -python-dotenv -tenacity -tiktoken -numexpr -azure-search-documents -langchain-text-splitters -langchain_community -pypdf \ No newline at end of file +azure-core==1.38.2 +azure-functions==1.24.0 +azure-identity==1.25.2 +azure-storage-blob==12.28.0 +jsonschema==4.10.3 +openai==2.24.0 +python-dotenv==1.2.2 +tenacity==9.1.4 +tiktoken==0.12.0 +numexpr==2.14.1 +azure-search-documents==11.6.0 +langchain-text-splitters==1.1.1 +langchain_community==0.4.1 +pypdf==6.7.5 \ No newline at end of file From 004fb809b71959f2d309e51c3ff5c7f460c55b59 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:00:14 +0000 Subject: [PATCH 51/54] Initial plan From a3fd0d337b9d97d7ab24cee6d079ebd3322a3959 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:01:08 +0000 Subject: [PATCH 52/54] Fix E501 linting error: split long TRANSIENT_OPENAI_ERRORS line Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- src/custom_skills/VectorEmbed/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/custom_skills/VectorEmbed/__init__.py b/src/custom_skills/VectorEmbed/__init__.py index ac67e8b..e11225b 100644 --- a/src/custom_skills/VectorEmbed/__init__.py +++ b/src/custom_skills/VectorEmbed/__init__.py @@ -14,7 +14,9 @@ ) REQUEST_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "request_schema.json") -TRANSIENT_OPENAI_ERRORS = (openai.RateLimitError, openai.APIConnectionError, openai.APITimeoutError, openai.InternalServerError) +TRANSIENT_OPENAI_ERRORS = ( + openai.RateLimitError, openai.APIConnectionError, openai.APITimeoutError, openai.InternalServerError +) def function_vector_embed(req: func.HttpRequest) -> func.HttpResponse: From 0b9d6d157d01c67026f3754117a76d1741694369 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:21:32 +0000 Subject: [PATCH 53/54] Initial plan From e22e3e41023bb7b65a4388aba0b73086590fbd6b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 20:23:28 +0000 Subject: [PATCH 54/54] Add FileNotFoundError handling for missing az CLI in deploy functions Co-authored-by: sbaidachni <10055252+sbaidachni@users.noreply.github.com> --- mlops/deployment_scripts/deploy_azure_functions.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mlops/deployment_scripts/deploy_azure_functions.py b/mlops/deployment_scripts/deploy_azure_functions.py index bf8a7f8..f7ce1f0 100644 --- a/mlops/deployment_scripts/deploy_azure_functions.py +++ b/mlops/deployment_scripts/deploy_azure_functions.py @@ -166,6 +166,11 @@ def _deploy_functions( ], check=True, ) + except FileNotFoundError: + print( + "Error: 'az' CLI not found. Please install the Azure CLI and ensure it is on your PATH." + ) + raise except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") raise @@ -229,6 +234,11 @@ def _deploy_functions_withslot( ], check=True, ) + except FileNotFoundError: + print( + "Error: 'az' CLI not found. Please install the Azure CLI and ensure it is on your PATH." + ) + raise except subprocess.CalledProcessError as e: print(f"Error deploying function app: {e}") raise