diff --git a/.buildcontainer/Dockerfile b/.buildcontainer/Dockerfile index 0f9acf9..56d9242 100644 --- a/.buildcontainer/Dockerfile +++ b/.buildcontainer/Dockerfile @@ -1,42 +1,29 @@ -FROM ubuntu:22.04 +FROM mcr.microsoft.com/devcontainers/python:3.12 ARG USERNAME=vscode USER root -RUN apt-get update -y && apt-get install -y sudo wget gnupg software-properties-common curl bash && \ - # Download and install Miniconda - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \ - bash /tmp/miniconda.sh -b -p /opt/miniconda && \ - rm /tmp/miniconda.sh && \ - # Create a non-root user - useradd -m -s /bin/bash $USERNAME && \ - echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME && \ - chmod 0440 /etc/sudoers.d/$USERNAME && \ - # Change ownership of Miniconda to the non-root user - chown -R $USERNAME:$USERNAME /opt/miniconda - -ENV PATH=/opt/miniconda/bin:$PATH - -# Install dependencies -WORKDIR /home/$USERNAME - # Install Azure CLI -RUN sudo apt-get update && sudo apt-get install -y gnupg software-properties-common curl && \ - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash && \ - az config set extension.use_dynamic_install=yes_without_prompt && \ - az extension add -n ml +RUN pip install azure-cli && \ + az config set extension.use_dynamic_install=yes_without_prompt + +# Setup User +USER $USERNAME +WORKDIR /home/$USERNAME COPY requirements.txt . -# Create Conda environment and install Python dependencies -RUN conda create -n llm-env python=3.12 pip=23.2 -q -y && \ - conda run -n llm-env pip install -r requirements.txt && \ - conda clean -a -y && \ - conda run -n llm-env pip list +# Create virtual environment and install dependencies +# We use a venv named 'llm-env' to maintain consistency with previous conda setup +RUN python -m venv llm-env && \ + /home/$USERNAME/llm-env/bin/pip install --upgrade pip setuptools wheel && \ + /home/$USERNAME/llm-env/bin/pip install -r requirements.txt -RUN echo "conda activate llm-env" >> /home/$USERNAME/.bashrc +# Configure shell to use the environment +RUN echo "source /home/$USERNAME/llm-env/bin/activate" >> /home/$USERNAME/.bashrc -ENV PATH=/opt/miniconda/envs/llm-env/bin:$PATH +# Add venv to PATH +ENV PATH="/home/$USERNAME/llm-env/bin:$PATH" -CMD ["conda", "run", "-n", "llm-env", "python", "--version"] \ No newline at end of file +CMD ["python", "--version"] \ No newline at end of file diff --git a/.env.sample b/.env.sample index 7d5dce8..2a369f9 100644 --- a/.env.sample +++ b/.env.sample @@ -4,6 +4,7 @@ RESOURCE_GROUP_NAME= STORAGE_ACCOUNT_NAME= ACS_SERVICE_NAME= AOAI_BASE_ENDPOINT= -AI_STUDIO_PROJECT_NAME= +AI_FOUNDRY_PROJECT_URI="https://${AI_FOUNDRY_NAME}.services.ai.azure.com/api/projects/${PROJECT_NAME}" MANAGED_IDENTITY_CLIENT_ID= MANAGED_IDENTITY_NAME= +FUNCTION_APP_NAME= diff --git a/.github/workflows/ai_pull_ci_workflow.yml b/.github/workflows/ai_pull_ci_workflow.yml index c86224c..1bf39da 100644 --- a/.github/workflows/ai_pull_ci_workflow.yml +++ b/.github/workflows/ai_pull_ci_workflow.yml @@ -14,10 +14,12 @@ env: STORAGE_ACCOUNT_NAME: ${{ vars.STORAGE_ACCOUNT_NAME }} ACS_SERVICE_NAME: ${{ vars.ACS_SERVICE_NAME }} AOAI_BASE_ENDPOINT: ${{ vars.AOAI_BASE_ENDPOINT }} - AI_STUDIO_PROJECT_NAME: ${{ vars.AI_STUDIO_PROJECT_NAME }} + AI_FOUNDRY_PROJECT_URI: ${{ vars.AI_FOUNDRY_PROJECT_URI }} MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} + FUNCTION_APP_NAME: ${{ vars.FUNCTION_APP_NAME }} permissions: id-token: write @@ -27,7 +29,7 @@ jobs: name: Deployment and Evaluation runs-on: ubuntu-latest container: - image: aipullacr.azurecr.io/devops_container_image:latest + image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: username: ${{ secrets.ACR_USERNAME }} @@ -39,7 +41,7 @@ jobs: - name: Azure login uses: azure/login@v2 with: - client-id: ${{ env.MANAGED_IDENTITY_CLIENT_ID }} + client-id: ${{ env.FEDERATED_CLIENT_ID }} tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID}} subscription-id: ${{ env.SUBSCRIPTION_ID }} diff --git a/.github/workflows/ai_pull_pr_workflow.yml b/.github/workflows/ai_pull_pr_workflow.yml index a0fa2d9..951a742 100644 --- a/.github/workflows/ai_pull_pr_workflow.yml +++ b/.github/workflows/ai_pull_pr_workflow.yml @@ -12,10 +12,12 @@ env: STORAGE_ACCOUNT_NAME: ${{ vars.STORAGE_ACCOUNT_NAME }} ACS_SERVICE_NAME: ${{ vars.ACS_SERVICE_NAME }} AOAI_BASE_ENDPOINT: ${{ vars.AOAI_BASE_ENDPOINT }} - AI_STUDIO_PROJECT_NAME: ${{ vars.AI_STUDIO_PROJECT_NAME }} + AI_FOUNDRY_PROJECT_URI: ${{ vars.AI_FOUNDRY_PROJECT_URI }} MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} MANAGED_IDENTITY_NAME: ${{ vars.MANAGED_IDENTITY_NAME }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} + FUNCTION_APP_NAME: ${{ vars.FUNCTION_APP_NAME }} permissions: id-token: write @@ -32,13 +34,13 @@ jobs: deploy-and-evaluate: name: Deployment and Evaluation runs-on: ubuntu-latest + needs: build-validation container: - image: aipullacr.azurecr.io/devops_container_image:latest + image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: username: ${{ secrets.ACR_USERNAME }} password: ${{ secrets.ACR_PASSWORD }} - needs: build-validation steps: - name: Checkout Actions uses: actions/checkout@v1 @@ -46,35 +48,35 @@ jobs: - name: Azure login uses: azure/login@v2 with: - client-id: ${{ env.MANAGED_IDENTITY_CLIENT_ID }} + client-id: ${{ env.FEDERATED_CLIENT_ID }} tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID}} subscription-id: ${{ env.SUBSCRIPTION_ID }} - name: Execute Azure Functions Deployment shell: bash run: | - python -u -m mlops.deployment_scripts.deploy_azure_functions + python -u -m mlops.deployment_scripts.deploy_azure_functions --ignore_slot env: BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} - name: Validate Azure Functions Deployment shell: bash run: | - python -u -m mlops.deployment_scripts.run_functions + python -u -m mlops.deployment_scripts.run_functions --ignore_slot env: BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} - name: Deploy Indexer shell: bash run: | - python -u -m mlops.deployment_scripts.build_indexer + python -u -m mlops.deployment_scripts.build_indexer --ignore_slot env: BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} - name: Execute search evaluation shell: bash run: | - python -u -m mlops.evaluation.search_evaluation --gt_path "./mlops/evaluation/data/search_evaluation_data.jsonl" --semantic_config my-semantic-config + python -u -m mlops.evaluation.search_evaluation --gt_path "./mlops/evaluation/data/search_evaluation_data_sample.jsonl" --semantic_config my-semantic-config env: BUILD_SOURCEBRANCHNAME: ${{ github.head_ref || github.ref_name }} diff --git a/.github/workflows/build_devops_container.yml b/.github/workflows/build_devops_container.yml index a52a287..159e1b9 100644 --- a/.github/workflows/build_devops_container.yml +++ b/.github/workflows/build_devops_container.yml @@ -6,13 +6,14 @@ on: - 'development' paths: - '.github/workflows/build_devops_container.yml' - - 'requirements.txt' + - 'requirements.txt' + - '.buildcontainer/Dockerfile' env: - IMAGE_NAME: devops_container_image + IMAGE_NAME: ${{ vars.IMAGE_NAME }} SUBSCRIPTION_ID: ${{ vars.SUBSCRIPTION_ID }} RESOURCE_GROUP_NAME: ${{ vars.RESOURCE_GROUP_NAME }} - MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} ACR_CONTAINER_REGISTRY: ${{ vars.ACR_CONTAINER_REGISTRY }} @@ -34,7 +35,7 @@ jobs: - name: Azure login uses: azure/login@v2 with: - client-id: ${{ env.MANAGED_IDENTITY_CLIENT_ID }} + client-id: ${{ env.FEDERATED_CLIENT_ID }} tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID}} subscription-id: ${{ env.SUBSCRIPTION_ID }} diff --git a/.github/workflows/build_validation_workflow.yml b/.github/workflows/build_validation_workflow.yml index bc43eec..4615a3b 100644 --- a/.github/workflows/build_validation_workflow.yml +++ b/.github/workflows/build_validation_workflow.yml @@ -13,7 +13,7 @@ jobs: run-unit-tests: runs-on: ubuntu-latest container: - image: aipullacr.azurecr.io/devops_container_image:latest + image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: username: ${{ secrets.ACR_USERNAME }} diff --git a/.github/workflows/data_initialization_workflow.yml b/.github/workflows/data_initialization_workflow.yml index 0856e5c..e27fbaf 100644 --- a/.github/workflows/data_initialization_workflow.yml +++ b/.github/workflows/data_initialization_workflow.yml @@ -9,7 +9,7 @@ env: SUBSCRIPTION_ID: ${{ vars.SUBSCRIPTION_ID }} RESOURCE_GROUP_NAME: ${{ vars.RESOURCE_GROUP_NAME }} STORAGE_ACCOUNT_NAME: ${{ vars.STORAGE_ACCOUNT_NAME }} - MANAGED_IDENTITY_CLIENT_ID: ${{ vars.MANAGED_IDENTITY_CLIENT_ID }} + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} permissions: @@ -20,7 +20,7 @@ jobs: upload-data: runs-on: ubuntu-latest container: - image: aipullacr.azurecr.io/devops_container_image:latest + image: ${{ vars.ACR_CONTAINER_REGISTRY }}.azurecr.io/${{ vars.IMAGE_NAME }}:latest options: --user root credentials: username: ${{ secrets.ACR_USERNAME }} @@ -32,7 +32,7 @@ jobs: - name: Azure login uses: azure/login@v2 with: - client-id: ${{ env.MANAGED_IDENTITY_CLIENT_ID }} + client-id: ${{ env.FEDERATED_CLIENT_ID }} tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID}} subscription-id: ${{ env.SUBSCRIPTION_ID }} diff --git a/.gitignore b/.gitignore index 5c74b11..713005f 100644 --- a/.gitignore +++ b/.gitignore @@ -417,3 +417,5 @@ local.settings.json # Other .DS_Store + +results diff --git a/README.md b/README.md index d4b5586..5cc346d 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,8 @@ The deployment of **custom skills** poses a unique challenge in data processing Each deployment contains functions that we are using in the indexing process, and we can reference the functions using the slot name in the skillset itself. The deploy_azure_functions.py file contains all needed methods to demonstrate a way to deploy Azure Functions from code. +> **Note on deployment slots**: Deployment slots are only available on **Standard, Premium, and Dedicated App Service plans** — they are not supported on Consumption or Flex Consumption plans. For this reason, the current CI workflows use `--ignore_slot` to deploy directly to the main function app. The code still supports slot-based deployments (the default when `--ignore_slot` is omitted), and engineers who are on a supported plan can take advantage of slots for parallel experimentation. If slots are not available on your plan, each engineer working in parallel should use their **own dedicated Azure Function App** to avoid overwriting each other's deployments during active experiments. + Once all associated APIs, skillsets, indexes, data sources, and indexers are deployed, the SDK can be used to wait until the indexing process is completed. At that point, evaluation can begin. To illustrate the evaluation process, we utilize the Azure AI Evaluation SDK. This tool allows for the execution of complex evaluations either locally or through serverless computing in AI Foundry. Additionally, evaluation results can be published to AI Foundry. The **search_evaluation.py** script provides guidance on setting up the evaluation process using various custom evaluators. It also includes instructions on querying AI Search for data and details on publishing evaluation results to AI Foundry. The following image demonstrates several evaluation results, and it’s possible to note that branch names have been utilized there as well. @@ -77,6 +79,13 @@ The repository illustrates how to operate in a keyless environment without stori - **Azure Functions**: We are using Azure Functions to get access to resources like Azure Blob and Azure OpenAI. Rather than storing keys in the application settings for Azure Functions we utilize user-assigned managed identity. You can find more details visiting this [link](./docs/durable_azurefunction_deployment.md). - **AI Search**: index and data source entities should have access to data (Azure Blob in our case) and Azure OpenAI for data processing. In this template we demonstrate how to use system assigned managed identity avoid storing keys directly. More details can be found [here](./docs/ai_search_system_identity.md). +This template uses **two separate identity client IDs** for different purposes: + +- **`FEDERATED_CLIENT_ID`** — the Client ID of a **user-assigned managed identity or a Microsoft Entra application** configured in Azure AD. It is used exclusively by GitHub Actions to authenticate with Azure via OIDC. GitHub exchanges an OIDC token for a short-lived Azure access token using this identity, so no credentials are stored in GitHub secrets. Both a managed identity and a service principal (app registration) are supported for this purpose. +- **`MANAGED_IDENTITY_CLIENT_ID`** — the Client ID of a **user-assigned managed identity** that is attached to the Azure Function App and AI Search service. Code running inside the function app uses this identity to access Azure resources (Blob Storage, Azure OpenAI) without storing any keys. + +These two identities serve different trust boundaries: one is for GitHub's CI/CD pipeline, and the other is for the deployed Azure services. In simpler setups it is possible to use a single identity for both purposes, provided the identity has all the required role assignments (Contributor access for deployment, plus resource-level roles for storage and OpenAI). Using separate identities is the recommended approach for least-privilege security. + In addition to providing documentation on the use of managed identities, it is important to note that Azure AI Search may require additional configurations to enable interaction with managed identities. To achieve this, navigate to the **Keys** tab and ensure that either **Role-based access control** or **Both** is selected. ![AI Search access](./docs/images/data_retrieval_8.png) @@ -105,8 +114,8 @@ The deployment scripts and github workflows use the git branch name to create a ### Configuration -- Create an `.env` file based on `.env.sample` and populate the appropriate values. -- Modify `config/config.yaml` to meet any changes that have been made within the project. +- Create an `.env` file based on `.env.sample` and populate the appropriate values. The `AI_FOUNDRY_PROJECT_URI` value should follow the format `https://.services.ai.azure.com/api/projects/`. +- Modify `config/config.yaml` to meet any changes that have been made within the project. The `function_app_name` is read from the `FUNCTION_APP_NAME` environment variable. To disable anonymous telemetry, remove the `enable_telemetry` key from `config/config.yaml`. ### Upload test data @@ -124,6 +133,12 @@ The following deployment script will deploy the custom skillset functions to a f python -m mlops.deployment_scripts.deploy_azure_functions ``` +To deploy directly to the main function app without using a deployment slot (as in CI builds), use the `--ignore_slot` flag: + +```sh +python -m mlops.deployment_scripts.deploy_azure_functions --ignore_slot +``` + To test the two skillset functions after they are deployed, run the following script: ```sh @@ -142,7 +157,7 @@ python -m mlops.deployment_scripts.build_indexer ### Perform Search Evaluation -This will perform search evaluation and upload the result to the AI Studio project specified. For more information about evaluation, see the [search evaluation readme](/mlops/evaluation/readme.md). +This will perform search evaluation and upload the result to the Azure AI Foundry project specified by `AI_FOUNDRY_PROJECT_URI`. For more information about evaluation, see the [search evaluation readme](/mlops/evaluation/readme.md). ```sh python -m mlops.evaluation.search_evaluation --gt_path "./mlops/evaluation/data/search_evaluation_data.jsonl" --semantic_config my-semantic-config @@ -160,25 +175,42 @@ python -m mlops.deployment_scripts.cleanup_pr This project contains github workflows for PR validation and Continuous Integration (CI). -The PR workflow executes quality checks using flake8 and unit tests. It then deploys the skillset functions to a deployment slot of the function app. Once the functions are deployed and tested, an indexer is deployed and all of the test data is ingested from blob storage. Search evaluation is run and uploaded to an AI Studio project. +The PR workflow executes quality checks using flake8 and unit tests. It then deploys the skillset functions to a deployment slot of the function app. Once the functions are deployed and tested, an indexer is deployed and all of the test data is ingested from blob storage. Search evaluation is run, the results are uploaded to an Azure AI Foundry project, and a summary comment is posted on the pull request. The CI workflow executes a similar workflow to the PR workflow, but the skillset functions are deployed to the main function app, not a deployment slot. In order for the cleanup step of the CI Workflow to work correctly, the development branch from a pull request must not be deleted until the cleanup step has run. -Some variables and secrets should be provided to execute the github workflows (primarily the same ones used in the `.env` file for local execution). +### Container-based Workflow Execution -- azure_credentials -- subscription_id -- resource_group_name -- storage_account_name -- acs_service_name -- aoai_base_endpoint -- ai_studio_project_name +The PR and CI workflows (and the build validation workflow) run all job steps **inside a Docker container** pulled from an Azure Container Registry (ACR). This container image is pre-built with all Python dependencies, the Azure CLI, and any other tools required by the scripts, ensuring a consistent and fast execution environment. -## Related Projects +The container image is defined in `.buildcontainer/Dockerfile` and is built and pushed to ACR automatically by the `build_devops_container.yml` workflow whenever `requirements.txt` or the Dockerfile changes. The `ACR_CONTAINER_REGISTRY` and `IMAGE_NAME` repository variables control which image is used at runtime. -- [mlops-promptflow-prompt](https://github.com/microsoft/mlops-promptflow-prompt) - This repository demonstrates how AI Fondry and Prompt flow can be utilized in the Machine Learning Development and Operations (MLOps) process for LLM-based applications (aka LLMOps). It has base examples for inference evaluation using Prompt flow. When combined with [mlops-aisearch-pull](/README.md) for search evaluation, a full end-to-end MLOPs workflow can be achieved. +Running jobs inside a container provides an important isolation benefit: without containerization, a workflow running on a self-hosted VM could inadvertently pick up environment variables, Python packages, or other libraries left over from a previous workflow run, leading to hard-to-debug inconsistencies. The container guarantees a clean, reproducible environment on every run. + +**Self-hosted runners**: If you run these workflows on self-hosted machines rather than GitHub-hosted runners, the runner machine must have Docker installed and network access to the ACR. Make sure the runner can authenticate with the registry — the `ACR_USERNAME` and `ACR_PASSWORD` secrets are passed through to the container runtime for this purpose. + +Some variables and secrets should be provided to execute the github workflows. The following **repository variables** (`vars.*`) are required: + +- `SUBSCRIPTION_ID` +- `RESOURCE_GROUP_NAME` +- `STORAGE_ACCOUNT_NAME` +- `ACS_SERVICE_NAME` +- `AOAI_BASE_ENDPOINT` +- `AI_FOUNDRY_PROJECT_URI` +- `MANAGED_IDENTITY_CLIENT_ID` +- `MANAGED_IDENTITY_NAME` +- `MANAGED_IDENTITY_TENANT_ID` +- `FEDERATED_CLIENT_ID` — client ID of the Microsoft Entra application used by GitHub Actions to authenticate with Azure via OIDC (see [federated identity setup](./docs/federated_identity_openid_connect.md)) +- `FUNCTION_APP_NAME` — name of the Azure Function App used for custom skills deployment +- `ACR_CONTAINER_REGISTRY` — Azure Container Registry name (without `.azurecr.io`) that hosts the DevOps container image +- `IMAGE_NAME` — name of the container image used in the workflows + +The following **repository secrets** (`secrets.*`) are also required: + +- `ACR_USERNAME` — username for authenticating with the Azure Container Registry +- `ACR_PASSWORD` — password for authenticating with the Azure Container Registry ## Contributing @@ -194,6 +226,12 @@ This project has adopted the [Microsoft Open Source Code of Conduct](https://ope For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. +## Data Collection + +The software may collect information about you and your use of the software and send it to Microsoft. Microsoft may use this information to provide services and improve our products and services. You may turn off the telemetry as described below. There are also some features in the software that may enable you and Microsoft to collect data from users of your applications. If you use these features, you must comply with applicable law, including providing appropriate notices to users of your applications together with a copy of Microsoft’s privacy statement. Our privacy statement is located at [https://go.microsoft.com/fwlink/?LinkID=824704](https://go.microsoft.com/fwlink/?LinkID=824704). You can learn more about data collection and use in the help documentation and our privacy statement. Your use of the software operates as your consent to these practices. + +The enable_telemetry configuration in config/config.yaml enables anonymous telemetry that helps us justify ongoing investment in maintaining and improving this template. Keeping this enabled supports the project and future feature development. To opt out of this telemetry, simply remove enable_telemetry. + ## Trademarks This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft diff --git a/config/config.yaml b/config/config.yaml index 8d4c5a7..dc1e47c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -6,6 +6,8 @@ sub_config: managed_identity_client_id: ${MANAGED_IDENTITY_CLIENT_ID} managed_identity_name: ${MANAGED_IDENTITY_NAME} +enable_telemetry: true + # Azure OpenAI configuration. aoai_config: aoai_api_base: ${AOAI_BASE_ENDPOINT} @@ -15,7 +17,7 @@ aoai_config: functions_config: function_names: ["Chunk", "Vector_Embed"] - function_app_name: aiskills-pull + function_app_name: ${FUNCTION_APP_NAME} # Azure Cognitive Service config acs_config: diff --git a/docs/ai_search_system_identity.md b/docs/ai_search_system_identity.md index ae29878..83e3e84 100644 --- a/docs/ai_search_system_identity.md +++ b/docs/ai_search_system_identity.md @@ -10,6 +10,16 @@ After the system generates the identity, roles can be assigned to it. For this e ![Roles](./images/ai_identity_2.png) -This concludes the instructions, and you may now proceed with building indexers and indexes without keys. For Azure OpenAI components, the key can be removed without requiring any other modifications. In the case of storage, it is necessary to modify the connection string using the following format: +This concludes the instructions, and you may now proceed with building indexers and indexes without keys. For Azure OpenAI components, the key can be removed without requiring any other modifications. In the case of storage, the data source uses a user-assigned managed identity to access blob storage instead of a connection string key. The `documentDataSource.json` configuration sets both the connection string (using the `ResourceId` format below) and an explicit identity reference: -```ResourceId=/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Storage/storageAccounts/{storage_account_name}``` +``` +ResourceId=/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Storage/storageAccounts/{storage_account_name} +``` + +The user-assigned managed identity is specified by its full Azure resource ID in the format: + +``` +/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{managed_identity_name} +``` + +This identity resource ID is automatically populated from the `MANAGED_IDENTITY_NAME` environment variable during deployment by `build_indexer.py`. diff --git a/docs/durable_azurefunction_deployment.md b/docs/durable_azurefunction_deployment.md index b420a69..747ab1f 100644 --- a/docs/durable_azurefunction_deployment.md +++ b/docs/durable_azurefunction_deployment.md @@ -48,6 +48,14 @@ Additional records should be added to App Settings of Azure Functions: Once all the values are provided Azure Functions will be able to communicate with the associated storage account with no explicit connection strings. +## Function Authentication + +The custom skillset functions use `AuthLevel.FUNCTION`, which means callers must supply a valid function key. Function keys are automatically generated by Azure Functions and can be retrieved programmatically using the Azure Management SDK. The `deploy_azure_functions.py` and `run_functions.py` scripts retrieve the function key as part of the deployment and validation process. + +## Deploying Functions + +Functions are deployed using the Azure CLI `az functionapp deployment source config-zip` command with the `--build-remote true` flag, which triggers a remote build on the Azure App Service. This eliminates the need for `ENABLE_ORYX_BUILD` or `SCM_DO_BUILD_DURING_DEPLOYMENT` app settings. + ## Obtaining Credentials in Code Finally, we need to make sure that Azure Functions can use our identity in code to communicate with other services. diff --git a/docs/federated_identity_openid_connect.md b/docs/federated_identity_openid_connect.md index e7a79af..277b4a4 100644 --- a/docs/federated_identity_openid_connect.md +++ b/docs/federated_identity_openid_connect.md @@ -40,11 +40,13 @@ The `Entity Type` is used to define the scope of the OIDC requests from GitHub W ### Step 3: Set GitHub Secrets/Variables -Create GitHub secrets/variables to store Microsoft Entra application details or user-assigned managed identity for your GitHub secrets: +Create GitHub **repository variables** (not secrets) to store Microsoft Entra application details used by the workflows: -* AZURE_CLIENT_ID -* AZURE_TENANT_ID -* AZURE_SUBSCRIPTION_ID +* `FEDERATED_CLIENT_ID` — the Client ID of the Microsoft Entra application registered in Step 1 +* `MANAGED_IDENTITY_TENANT_ID` — the Directory (tenant) ID of the Microsoft Entra application +* `SUBSCRIPTION_ID` — the Azure subscription ID + +> **Note**: These are stored as repository **variables** (`vars.*`) in GitHub, not as secrets, because they are not sensitive credentials. The actual sensitive values (like container registry passwords) are stored as secrets. ## Workflow @@ -56,27 +58,31 @@ To setup a GitHub workflow we need to implement the following steps: 1. Set GitHub workflows permissions so that the token can work with Azure subscription. The workflow requires `id-token: write` and `contents: read` permissions. The `id-token: write` permission allows the workflow to request an OIDC token from GitHub's OIDC provider. 2. The azure/login@v2 action retrieves the OIDC token and exchanges it with Azure Active Directory (Azure AD) to obtain an access token. Azure AD verifies the OIDC token and issues an access token if the token is valid and the federated identity credential configuration matches. -``` +```yaml name: CI Platform Python Workflow on: - push: + push: branches: - - 'main' + - 'development' + +env: + FEDERATED_CLIENT_ID: ${{ vars.FEDERATED_CLIENT_ID }} + MANAGED_IDENTITY_TENANT_ID: ${{ vars.MANAGED_IDENTITY_TENANT_ID }} + SUBSCRIPTION_ID: ${{ vars.SUBSCRIPTION_ID }} permissions: - id-token: write - contents: read + id-token: write + contents: read jobs: - build-and-deploy-python: + build-and-deploy-python: runs-on: ubuntu-latest steps: - - name: Azure login + - name: Azure login uses: azure/login@v2 with: - client-id: ${{ secrets.AZURE_CLIENT_ID }} - tenant-id: ${{ secrets.AZURE_TENANT_ID }} - subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - + client-id: ${{ env.FEDERATED_CLIENT_ID }} + tenant-id: ${{ env.MANAGED_IDENTITY_TENANT_ID }} + subscription-id: ${{ env.SUBSCRIPTION_ID }} ``` diff --git a/mlops/acs_config/documentDataSource.json b/mlops/acs_config/documentDataSource.json index 5e4eba9..2a08e79 100644 --- a/mlops/acs_config/documentDataSource.json +++ b/mlops/acs_config/documentDataSource.json @@ -6,6 +6,10 @@ "credentials": { "connectionString": "{connection_string}" }, + "identity": { + "@odata.type": "#Microsoft.Azure.Search.DataUserAssignedIdentity", + "userAssignedIdentity": "{identity_connection_string}" + }, "container": { "name": "{container_name}", "query": null diff --git a/mlops/acs_config/documentIndex.json b/mlops/acs_config/documentIndex.json index 0d8fe13..6e734cf 100644 --- a/mlops/acs_config/documentIndex.json +++ b/mlops/acs_config/documentIndex.json @@ -156,7 +156,11 @@ "azureOpenAIParameters": { "resourceUri": "{openai_api_endpoint}", "deploymentId": "{openai_embedding_model}", - "modelName": "text-embedding-ada-002" + "modelName": "text-embedding-ada-002", + "authIdentity": { + "@odata.type": "#Microsoft.Azure.Search.DataUserAssignedIdentity", + "userAssignedIdentity": "{identity_connection_string}" + } } } ] diff --git a/mlops/common/config_utils.py b/mlops/common/config_utils.py index 701797d..a47d78c 100644 --- a/mlops/common/config_utils.py +++ b/mlops/common/config_utils.py @@ -26,6 +26,10 @@ def __getattr__(self, __name: str) -> Any: """Get values for top level keys in configuration.""" return self._raw_config[__name] + def has_key(self, key_name: str) -> bool: + """Check if the configuration has a given top level key.""" + return key_name in self._raw_config + def get_flow_config(self, flow_name: str) -> Dict: """Get the pipeline configuration for given flow name and environment.""" flowconfig_name = f"{flow_name}_{self._environment}" diff --git a/mlops/common/function_utils.py b/mlops/common/function_utils.py index e5cf7a9..7ff2814 100644 --- a/mlops/common/function_utils.py +++ b/mlops/common/function_utils.py @@ -17,8 +17,6 @@ def get_app_settings(config: dict, index_name: str): settings_dict["MANAGED_IDENTITY_CLIENT_ID"] = config.sub_config["managed_identity_client_id"] - settings_dict["ENABLE_ORYX_BUILD"] = "true" - settings_dict["SCM_DO_BUILD_DURING_DEPLOYMENT"] = "true" return settings_dict diff --git a/mlops/deployment_scripts/build_indexer.py b/mlops/deployment_scripts/build_indexer.py index 2518ac4..f67f329 100644 --- a/mlops/deployment_scripts/build_indexer.py +++ b/mlops/deployment_scripts/build_indexer.py @@ -34,6 +34,7 @@ def _create_or_update_search_index( file_name: str, bearer_token: str, api_version: str, + identity_resource_id: str, ) -> None: # Use the REST API, there is a bug in the Search SDK that prevents creating the Vector field correctly @@ -57,6 +58,7 @@ def _create_or_update_search_index( aoai_config["aoai_embedding_model_deployment"], ) index_def = index_def.replace("{openai_embedding_model}", aoai_config["aoai_embedding_model_deployment"]) + index_def = index_def.replace("{identity_connection_string}", identity_resource_id) response = requests.put( url=index_url, data=index_def, params=params, headers=headers @@ -108,21 +110,22 @@ def _get_identity_resource( resource_group_name: str, managed_identity_name: str ) -> str: - resource_string = f"/subscriptions/{subscription_id}/resourcegroups/{resource_group_name}" \ + resource_string = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}" \ f"/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{managed_identity_name}" return resource_string def _generate_data_source_connection( - connection_name: str, file_name: str, conn_string: str, container: str + connection_name: str, file_name: str, conn_string: str, identity_conn_string: str, container: str ): with open(file_name) as data_source_file: data_source_def = data_source_file.read() - data_source_def = data_source_def.replace("{conn_string}", conn_string) + data_source_def = data_source_def.replace("{connection_string}", conn_string) data_source_def = data_source_def.replace("{container_name}", container) data_source_def = data_source_def.replace("{name}", connection_name) + data_source_def = data_source_def.replace("{identity_connection_string}", identity_conn_string) data_source_connection = SearchIndexerDataSourceConnection.deserialize( data_source_def, APPLICATION_JSON_CONTENT_TYPE ) @@ -227,6 +230,18 @@ def main(): # Get the token bearer_token = credential.get_token(aisearch_scope).token + conn_string = _get_storage_conn_string( + sub_config["subscription_id"], + sub_config["storage_account_name"], + sub_config["resource_group_name"], + ) + + identity_string = _get_identity_resource( + sub_config["subscription_id"], + sub_config["resource_group_name"], + sub_config["managed_identity_name"] + ) + # Create the full document index _create_or_update_search_index( aoai_config, @@ -235,12 +250,7 @@ def main(): file_name=acs_config["acs_document_index_file"], bearer_token=bearer_token, api_version=acs_config["acs_api_version"], - ) - - conn_string = _get_storage_conn_string( - sub_config["subscription_id"], - sub_config["storage_account_name"], - sub_config["resource_group_name"], + identity_resource_id=identity_string ) search_indexer_client = SearchIndexerClient( @@ -257,6 +267,7 @@ def main(): generate_data_source_name(), file_name=acs_config["acs_document_data_source"], conn_string=conn_string, + identity_conn_string=identity_string, container=storage_container, ) search_indexer_client.create_or_update_data_source_connection( diff --git a/mlops/deployment_scripts/deploy_azure_functions.py b/mlops/deployment_scripts/deploy_azure_functions.py index 189dd62..f7ce1f0 100644 --- a/mlops/deployment_scripts/deploy_azure_functions.py +++ b/mlops/deployment_scripts/deploy_azure_functions.py @@ -4,10 +4,12 @@ import shutil import time import argparse +import subprocess +import os from azure.identity import DefaultAzureCredential from azure.mgmt.web import WebSiteManagementClient -from azure.mgmt.web.v2023_01_01.models import Site +from azure.mgmt.web.models import Site from mlops.common.config_utils import MLOpsConfig from mlops.common.naming_utils import generate_slot_name, generate_index_name from mlops.common.function_utils import ( @@ -17,10 +19,6 @@ # Define the path to the Azure function directory APPLICATION_JSON_CONTENT_TYPE = "application/json" FUNCTION_API_VERSION = "2022-03-01" -DEPLOYMENT_APP_URL = "https://{function_app_name}.scm.azurewebsites.net/api/zipdeploy" -DEPLOYMENT_APP_URL_WITH_SLOT = ( - "https://{function_app_name}-{slot}.scm.azurewebsites.net/api/zipdeploy" -) MANAGEMENT_FUNCTION_URL = ( "https://management.azure.com/subscriptions/{subscription_id}" "/resourceGroups/{resource_group}" @@ -79,6 +77,12 @@ def _wait_for_functions_ready( "Authorization": "Bearer {access_token}".format(access_token=access_token), } + # TELEMETRY CODE BEGINS + # Can be removed or disabled in config.yaml file + if os.getenv("ENABLE_TELEMETRY", "false").lower() == "true": + headers["User-Agent"] = "acce1e78-98c3-42d1-b5fd-a5c2c365fbfe/1.0" + # TELEMETRY CODE ENDS + for function_name in function_names: if slot is None: url = MANAGEMENT_FUNCTION_URL.format( @@ -126,7 +130,6 @@ def _wait_for_functions_ready( def _deploy_functions( credential: DefaultAzureCredential, - deployment_url: str, subscription_id: str, resource_group_name: str, func_name: str, @@ -136,12 +139,6 @@ def _deploy_functions( credential=credential, subscription_id=subscription_id ) - # Generate access token header - access_token = credential.get_token(MANAGEMENT_SCOPE_URL).token - headers = { - "Content-Type": "application/zip", - "Authorization": "Bearer {access_token}".format(access_token=access_token), - } # Create a zip file of the Custom Skills directory zip_filename = shutil.make_archive( base_name="__customskills", @@ -149,41 +146,34 @@ def _deploy_functions( root_dir=CUSTOM_SKILLS_DIR, ) - # Define the payload for the REST API call - with open(zip_filename, "rb") as f: - payload = f.read() - try: - # Send a POST request to the Azure function app to deploy the zip file - requests.post(deployment_url, headers=headers, data=payload, timeout=60) - except requests.exceptions.RequestException: - print( - "Request has been sent, but no response yet. Checking deployment status in the next step." + print(f"Deploying {zip_filename} to {func_name}...") + subprocess.run( + [ + "az", + "functionapp", + "deployment", + "source", + "config-zip", + "-g", + resource_group_name, + "-n", + func_name, + "--src", + zip_filename, + "--build-remote", + "true", + ], + check=True, ) - - print("Looking for an active deployment.") - # look at existing app for a location - deployment_slots = app_mgmt_client.web_apps.list_deployments( - resource_group_name, func_name - ) - - current_slot = deployment_slots.next() - id = current_slot.id.split("/")[-1] - - print(f"Deployment id: {id}") - status = current_slot.status - - # get_deployment_slot returns 4 in the case of success and 1 for in-progress deployment. - while status != 4: - current_slot = app_mgmt_client.web_apps.get_deployment( - resource_group_name, func_name, id + except FileNotFoundError: + print( + "Error: 'az' CLI not found. Please install the Azure CLI and ensure it is on your PATH." ) - status = current_slot.status - if status == 1: - print("Deployment is in progress") - elif status != 4: - raise SystemExit(f"Unknown deployment status {status}") - time.sleep(10) + raise + except subprocess.CalledProcessError as e: + print(f"Error deploying function app: {e}") + raise print("Updating Application settings.") @@ -204,7 +194,6 @@ def _deploy_functions( def _deploy_functions_withslot( credential: DefaultAzureCredential, - deployment_url: str, subscription_id: str, resource_group_name: str, func_name: str, @@ -215,12 +204,6 @@ def _deploy_functions_withslot( credential=credential, subscription_id=subscription_id ) - # Generate access token header - access_token = credential.get_token(MANAGEMENT_SCOPE_URL).token - headers = { - "Content-Type": "application/zip", - "Authorization": "Bearer {access_token}".format(access_token=access_token), - } print(f"slot name is {slot_name}") # Create a zip file of the Custom Skills directory zip_filename = shutil.make_archive( @@ -229,41 +212,36 @@ def _deploy_functions_withslot( root_dir=CUSTOM_SKILLS_DIR, ) - # Define the payload for the REST API call - with open(zip_filename, "rb") as f: - payload = f.read() - try: - # Send a POST request to the Azure function app to deploy the zip file - requests.post(deployment_url, headers=headers, data=payload, timeout=60) - except requests.exceptions.RequestException: - print( - "Request has been sent, but no response yet. Checking deployment status in the next step." + print(f"Deploying {zip_filename} to {func_name} with slot {slot_name}...") + subprocess.run( + [ + "az", + "functionapp", + "deployment", + "source", + "config-zip", + "-g", + resource_group_name, + "-n", + func_name, + "--src", + zip_filename, + "--build-remote", + "true", + "--slot", + slot_name, + ], + check=True, ) - # raise SystemExit(e) - - print("Looking for an active deployment.") - # look at existing app for a location - deployment_slots = app_mgmt_client.web_apps.list_deployments_slot( - resource_group_name, func_name, slot_name - ) - current_slot = deployment_slots.next() - id = current_slot.id.split("/")[-1] - - print(f"Deployment id: {id}") - status = current_slot.status - - # get_deployment_slot returns 4 in the case of success and 1 for in-progress deployment. - while status != 4: - current_slot = app_mgmt_client.web_apps.get_deployment_slot( - resource_group_name, func_name, id, slot_name + except FileNotFoundError: + print( + "Error: 'az' CLI not found. Please install the Azure CLI and ensure it is on your PATH." ) - status = current_slot.status - if status == 1: - print("Deployment is in progress") - elif status != 4: - raise SystemExit(f"Unknown deployment status {status}") - time.sleep(10) + raise + except subprocess.CalledProcessError as e: + print(f"Error deploying function app: {e}") + raise print("Updating Application settings.") existing_app_settings = app_mgmt_client.web_apps.list_application_settings_slot( @@ -303,6 +281,10 @@ def main(): # functions_config contains a section with function settings function_app_name = config.functions_config["function_app_name"] + # TELEMETRY SETTING + if config.has_key("enable_telemetry") and config.enable_telemetry: + os.environ["ENABLE_TELEMETRY"] = "true" + credential = DefaultAzureCredential() # generate a slot name for the functions based on the branch name @@ -313,23 +295,15 @@ def main(): app_settings = get_app_settings(config, generate_index_name()) # deploying or updating the slot - if slot_name is None: - deployment_url = DEPLOYMENT_APP_URL.format(function_app_name=function_app_name) - else: + if slot_name is not None: print("Creating a deployment slot.") _create_or_update_deployment_slot( credential, subscription_id, resource_group, function_app_name, slot_name ) - deployment_url = DEPLOYMENT_APP_URL_WITH_SLOT.format( - function_app_name=function_app_name, slot=slot_name - ) - - print(f"Deploying to: {deployment_url}") if slot_name is None: _deploy_functions( credential, - deployment_url, subscription_id, resource_group, function_app_name, @@ -338,7 +312,6 @@ def main(): else: _deploy_functions_withslot( credential, - deployment_url, subscription_id, resource_group, function_app_name, diff --git a/mlops/deployment_scripts/upload_data.py b/mlops/deployment_scripts/upload_data.py index c370d0b..7e4e3a0 100644 --- a/mlops/deployment_scripts/upload_data.py +++ b/mlops/deployment_scripts/upload_data.py @@ -6,6 +6,7 @@ from pathlib import Path import argparse +import os from azure.identity import DefaultAzureCredential from azure.storage.blob import BlobServiceClient from mlops.common.config_utils import MLOpsConfig @@ -37,10 +38,10 @@ def _upload_ops_files( # construct blob name from file path # everything rather than local_folder - file_subpath = str(file).split(f"{local_folder}/")[1] + file_subpath = file.relative_to(local_folder) # generate a unique name of the file - file_name = file_subpath.replace("/", "_") + file_name = str(file_subpath).replace(os.sep, "_") try: print(f"Ready to copy: {str(file)} to {file_name}.") diff --git a/mlops/evaluation/search_evaluation.py b/mlops/evaluation/search_evaluation.py index a15b0de..ad2228e 100644 --- a/mlops/evaluation/search_evaluation.py +++ b/mlops/evaluation/search_evaluation.py @@ -25,9 +25,7 @@ def main(index_name: str, semantic_config: str, data_path: str): """ experiment_name = generate_experiment_name(index_name) - subscription_id = os.environ.get("SUBSCRIPTION_ID") - resource_group = os.environ.get("RESOURCE_GROUP_NAME") - project_name = os.environ.get("AI_STUDIO_PROJECT_NAME") + project_name = os.environ.get("AI_FOUNDRY_PROJECT_URI") azure_search_service_name = os.environ.get("ACS_SERVICE_NAME") azure_search_endpoint = f"https://{azure_search_service_name}.search.windows.net" @@ -68,7 +66,7 @@ def main(index_name: str, semantic_config: str, data_path: str): } # Create results directory if it does not exist - results_dir = "./results" + results_dir = os.path.join(os.getcwd(), "results") if not os.path.exists(results_dir): os.makedirs(results_dir) @@ -79,12 +77,8 @@ def main(index_name: str, semantic_config: str, data_path: str): target=target, evaluators=evaluators, evaluator_config=evaluators_config, - azure_ai_project={ - "subscription_id": subscription_id, - "resource_group_name": resource_group, - "project_name": project_name, - }, - output_path=f"{results_dir}/{experiment_name}.json", + azure_ai_project=project_name, + output_path=os.path.join(results_dir, f"{experiment_name}.json"), ) print(results["studio_url"]) diff --git a/requirements.txt b/requirements.txt index 4f71e83..f707f73 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,16 +5,14 @@ azure-functions>=1.17.0 azure-storage-blob>=12.19.0 azure-mgmt-web>=7.2.0 pyyaml -flake8-docstrings==1.7.0 -flake8==6.1.0 -pep8-naming==0.13.3 -pytest-cov==4.1.0 -pytest-azurepipelines==1.0.5 -pytest-mock==3.12.0 -pytest==7.4.0 +flake8-docstrings>=1.7.0 +flake8>=6.1.0 +pep8-naming>=0.13.3 +pytest-cov>=4.1.0 +pytest-mock>=3.12.0 +pytest>=7.4.0 azure-mgmt-authorization>=4.0.0 -python-dotenv>=0.10.3 -azure-mgmt-search==9.1.0 -azure-mgmt-storage==21.1.0 -azure-search-documents==11.6.0b5 +azure-mgmt-search +azure-mgmt-storage +azure-search-documents azure-ai-evaluation diff --git a/src/custom_skills/VectorEmbed/__init__.py b/src/custom_skills/VectorEmbed/__init__.py index 32e237b..e11225b 100644 --- a/src/custom_skills/VectorEmbed/__init__.py +++ b/src/custom_skills/VectorEmbed/__init__.py @@ -14,6 +14,9 @@ ) REQUEST_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "request_schema.json") +TRANSIENT_OPENAI_ERRORS = ( + openai.RateLimitError, openai.APIConnectionError, openai.APITimeoutError, openai.InternalServerError +) def function_vector_embed(req: func.HttpRequest) -> func.HttpResponse: @@ -73,12 +76,11 @@ def _get_request_schema(): def _log_attempt_number(retry_state): """Log retry attempt.""" row = retry_state.args[0] - print(f"Rate Limit Exceeded! Retry Attempt #: {retry_state.attempt_number} | Chunk: {row}") + print(f"Transient error encountered. Retry Attempt #: {retry_state.attempt_number} | Chunk: {row}") -@retry(retry=retry_if_exception_type(openai.RateLimitError), - wait=wait_random_exponential(min=1, max=60), - stop=stop_after_attempt(10), after=_log_attempt_number) +@retry(retry=retry_if_exception_type(TRANSIENT_OPENAI_ERRORS), + wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(10), after=_log_attempt_number) def _generate_embedding(text, aoai_token): """ Generate embeddings for text. diff --git a/src/custom_skills/function_app.py b/src/custom_skills/function_app.py index 2e5f579..6dc44f8 100644 --- a/src/custom_skills/function_app.py +++ b/src/custom_skills/function_app.py @@ -7,7 +7,7 @@ app = func.FunctionApp() -@app.route("Health", auth_level=func.AuthLevel.ANONYMOUS) +@app.route("Health", auth_level=func.AuthLevel.FUNCTION) def health_check(req: func.HttpRequest) -> func.HttpResponse: """Check health of the function.""" version = 1 @@ -15,13 +15,13 @@ def health_check(req: func.HttpRequest) -> func.HttpResponse: return func.HttpResponse(f"This function executed successfully with version {version}.", status_code=200) -@app.route("Chunk", auth_level=func.AuthLevel.ANONYMOUS) +@app.route("Chunk", auth_level=func.AuthLevel.FUNCTION) def chunk(req: func.HttpRequest) -> func.HttpResponse: """Divide document into chunks of text.""" return function_chunk(req) -@app.route("Vector_Embed", auth_level=func.AuthLevel.ANONYMOUS) +@app.route("Vector_Embed", auth_level=func.AuthLevel.FUNCTION) def vector_embed(req: func.HttpRequest) -> func.HttpResponse: """Convert text to vector embedding.""" return function_vector_embed(req) diff --git a/src/custom_skills/requirements.txt b/src/custom_skills/requirements.txt index b24a0a0..2c35709 100644 --- a/src/custom_skills/requirements.txt +++ b/src/custom_skills/requirements.txt @@ -2,17 +2,17 @@ # The Python Worker is managed by Azure Functions platform # Manually managing azure-functions-worker may cause unexpected issues -azure-core==1.29.5 -azure-functions==1.17.0 -azure-identity==1.16.1 -azure-storage-blob==12.19.0 -jsonschema==4.19.2 -openai -python-dotenv==1.0.0 -tenacity==8.2.3 -tiktoken==0.5.1 -numexpr==2.8.7 -azure-search-documents==11.6.0b5 -langchain-text-splitters -langchain_community -pypdf \ No newline at end of file +azure-core==1.38.2 +azure-functions==1.24.0 +azure-identity==1.25.2 +azure-storage-blob==12.28.0 +jsonschema==4.10.3 +openai==2.24.0 +python-dotenv==1.2.2 +tenacity==9.1.4 +tiktoken==0.12.0 +numexpr==2.14.1 +azure-search-documents==11.6.0 +langchain-text-splitters==1.1.1 +langchain_community==0.4.1 +pypdf==6.7.5 \ No newline at end of file