From 49a6633899761ddfe26bd78b054952c2cf980f61 Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Tue, 31 Oct 2023 16:54:38 +0000 Subject: [PATCH 01/13] env creation --- hi-ml-azure/Makefile | 5 +++++ hi-ml-azure/environment.yml | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/hi-ml-azure/Makefile b/hi-ml-azure/Makefile index e4172758f..5b8bd221d 100644 --- a/hi-ml-azure/Makefile +++ b/hi-ml-azure/Makefile @@ -86,3 +86,8 @@ test_all: pip_test call_flake8 call_mypy call_pytest_and_coverage example: pip_local echo 'edit src/health/azure/examples/elevate_this.py to reference your compute_cluster_name' cd src/health/azure/examples; python elevate_this.py --azureml --message 'running example from makefile' + +# Create conda environment +env: + conda env remove -n himl-azure + conda env create -f environment.yml diff --git a/hi-ml-azure/environment.yml b/hi-ml-azure/environment.yml index 55a687735..c42e94adb 100644 --- a/hi-ml-azure/environment.yml +++ b/hi-ml-azure/environment.yml @@ -4,3 +4,20 @@ name: himl-azure dependencies: - pip=20.1.1 - python=3.7.3 + - pip: + - azure-ai-ml>=1.1.1 + - azureml-core>=1.42.0 + - azureml-dataset-runtime[fuse]>=1.42.0 + - azureml-mlflow>=1.42.0 + - azure-storage-blob==12.10.0 + - azureml-tensorboard>=1.42.0 + - azureml-train-core>=1.42.0 + - conda-merge>=0.1.5 + - mlflow>=1.29.0 + - pandas>=1.3.4 + - param>=1.12 + - protobuf<4.0 + - pysocks>=1.5.8 + - ruamel.yaml>=0.16.12 + - tensorboard>=2.6.0 + - typing-extensions>=4.3.0 From 0058865237c407386ba09e2ce994d7d49125b2cb Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Tue, 31 Oct 2023 16:54:53 +0000 Subject: [PATCH 02/13] launch --- hi-ml-azure/.vscode/launch.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hi-ml-azure/.vscode/launch.json b/hi-ml-azure/.vscode/launch.json index b0cdf7886..f271799cc 100644 --- a/hi-ml-azure/.vscode/launch.json +++ b/hi-ml-azure/.vscode/launch.json @@ -4,6 +4,14 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": false + }, { "name": "Run example script in AzureML", "type": "python", From 6cfae7b1f4a1251b07cf77a530996ecf1ff35817 Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Tue, 31 Oct 2023 20:56:11 +0000 Subject: [PATCH 03/13] register option --- hi-ml-azure/src/health_azure/datasets.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hi-ml-azure/src/health_azure/datasets.py b/hi-ml-azure/src/health_azure/datasets.py index 4c2edc359..6b55ccc3b 100644 --- a/hi-ml-azure/src/health_azure/datasets.py +++ b/hi-ml-azure/src/health_azure/datasets.py @@ -318,6 +318,7 @@ def __init__( use_mounting: Optional[bool] = None, target_folder: Optional[PathOrString] = None, local_folder: Optional[PathOrString] = None, + register_on_job_completion: bool = True, ): """ :param name: The name of the dataset, as it was registered in the AzureML workspace. For output datasets, @@ -338,6 +339,9 @@ def __init__( :param local_folder: The folder on the local machine at which the dataset is available. This is used only for runs outside of AzureML. If this is empty then the target_folder will be used to mount or download the dataset. + :param register_on_job_completion: Only for output datasets: If this flag is True, the dataset will be + registered in the AML portal after the job has completed and visible in the "Data" section. + If this flag is False, the dataset will be visible for the job, but not in the AML portal "Data" section. """ # This class would be a good candidate for a dataclass, but having an explicit constructor makes # documentation tools in the editor work nicer. @@ -354,6 +358,7 @@ def __init__( if str(self.target_folder) == ".": raise ValueError("Can't mount or download a dataset to the current working directory.") self.local_folder = Path(local_folder) if local_folder else None + self.register_on_job_completion = register_on_job_completion def to_input_dataset_local( self, @@ -463,8 +468,8 @@ def to_output_dataset(self, workspace: Workspace, dataset_index: int) -> OutputF dataset = OutputFileDatasetConfig( name=_output_dataset_key(index=dataset_index), destination=(datastore, self.name + "/") ) - # TODO: Can we get tags into here too? - dataset = dataset.register_on_complete(name=self.name) + if self.register_on_job_completion: + dataset = dataset.register_on_complete(name=self.name) if self.target_folder: raise ValueError("Output datasets can't have a target_folder set.") use_mounting = True if self.use_mounting is None else self.use_mounting From 749f120b7eb9b4b136d35713723d17477bc9c774 Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Tue, 31 Oct 2023 21:00:34 +0000 Subject: [PATCH 04/13] test script --- hi-ml-azure/.vscode/launch.json | 4 +-- hi-ml-azure/testazure/output_datasets.py | 38 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 hi-ml-azure/testazure/output_datasets.py diff --git a/hi-ml-azure/.vscode/launch.json b/hi-ml-azure/.vscode/launch.json index f271799cc..1e2245e98 100644 --- a/hi-ml-azure/.vscode/launch.json +++ b/hi-ml-azure/.vscode/launch.json @@ -5,10 +5,10 @@ "version": "0.2.0", "configurations": [ { - "name": "Python: Current File", + "name": "Output datasets", "type": "python", "request": "launch", - "program": "${file}", + "program": "${workspaceFolder}/testazure/output_datasets.py", "console": "integratedTerminal", "justMyCode": false }, diff --git a/hi-ml-azure/testazure/output_datasets.py b/hi-ml-azure/testazure/output_datasets.py new file mode 100644 index 000000000..5269ead24 --- /dev/null +++ b/hi-ml-azure/testazure/output_datasets.py @@ -0,0 +1,38 @@ +from datetime import datetime +from pathlib import Path +import sys + + +src_root = Path(__file__).parents[1] / "src" +sys.path.append(str(src_root)) + +from health_azure import submit_to_azure_if_needed, DatasetConfig + + +def main(): + # Define the output dataset + output_dataset = DatasetConfig( + name='output_dataset', + datastore='workspaceblobstore', + # path=f"outputs_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}" # Plus a random part + ) + + # Submit the script to Azure if needed + run_info = submit_to_azure_if_needed( + snapshot_root_directory=Path(__file__).parents[1], + output_datasets=[output_dataset], + compute_cluster_name="lite-testing-ds2", + submit_to_azureml=True, + strictly_aml_v1=True, + ) + + output_folder = run_info.output_datasets[0] + print(f"Output folder: {output_folder}") + output_file = output_folder / "output.txt" + output_file.write_text('Hello, world!') + + print("Done!") + + +if __name__ == "__main__": + main() From d5e194f4a7723c3540b140691ad06813cf7013c6 Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Tue, 31 Oct 2023 21:26:25 +0000 Subject: [PATCH 05/13] fix suffix --- hi-ml-azure/testazure/output_datasets.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hi-ml-azure/testazure/output_datasets.py b/hi-ml-azure/testazure/output_datasets.py index 5269ead24..7770c9791 100644 --- a/hi-ml-azure/testazure/output_datasets.py +++ b/hi-ml-azure/testazure/output_datasets.py @@ -1,6 +1,7 @@ from datetime import datetime from pathlib import Path import sys +import uuid src_root = Path(__file__).parents[1] / "src" @@ -12,9 +13,11 @@ def main(): # Define the output dataset output_dataset = DatasetConfig( - name='output_dataset', + # The dataset name will also be the name of the folder in the datastore + timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') + suffix = uuid.uuid4().hex[:6], + name=f"joboutputs-{timestamp}-{suffix}", datastore='workspaceblobstore', - # path=f"outputs_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}" # Plus a random part ) # Submit the script to Azure if needed From 86f8cea685ffcc8c6e93f4a8e71f4115e3b976de Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Tue, 31 Oct 2023 22:11:52 +0000 Subject: [PATCH 06/13] rename --- .../testazure/{output_datasets.py => job_with_output_dataset.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename hi-ml-azure/testazure/{output_datasets.py => job_with_output_dataset.py} (100%) diff --git a/hi-ml-azure/testazure/output_datasets.py b/hi-ml-azure/testazure/job_with_output_dataset.py similarity index 100% rename from hi-ml-azure/testazure/output_datasets.py rename to hi-ml-azure/testazure/job_with_output_dataset.py From c6ecb3ee3401b239e8ff10514b1994f2284250ef Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Tue, 31 Oct 2023 22:12:01 +0000 Subject: [PATCH 07/13] wip --- hi-ml-azure/.vscode/launch.json | 10 ++++- .../testazure/use_run_with_output_dataset.py | 41 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 hi-ml-azure/testazure/use_run_with_output_dataset.py diff --git a/hi-ml-azure/.vscode/launch.json b/hi-ml-azure/.vscode/launch.json index 1e2245e98..d32c40951 100644 --- a/hi-ml-azure/.vscode/launch.json +++ b/hi-ml-azure/.vscode/launch.json @@ -4,11 +4,19 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": false + }, { "name": "Output datasets", "type": "python", "request": "launch", - "program": "${workspaceFolder}/testazure/output_datasets.py", + "program": "${workspaceFolder}/testazure/job_with_output_dataset.py", "console": "integratedTerminal", "justMyCode": false }, diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py new file mode 100644 index 000000000..dc559260f --- /dev/null +++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py @@ -0,0 +1,41 @@ +from pathlib import Path +import sys + +src_root = Path(__file__).parents[1] / "src" +sys.path.append(str(src_root)) + +from health_azure.utils import get_ml_client, get_workspace + +run_id = "sincere_yacht_xjz95gwvq8" +workspace = get_workspace() +run = workspace.get_run(run_id) + + +ml_client = get_ml_client() +job = ml_client.jobs.get(run_id) +output_dataset = job.outputs["OUTPUT_0"] + +from azure.ai.ml.entities import Data +from azure.ai.ml.constants import AssetTypes +data_type = AssetTypes.URI_FILE + +data = Data(path=output_dataset.path) +# data.mount(ml_client) + +datastore = ml_client.datastores.get("workspaceblobstore") +print(datastore.account_name) +print(datastore.container_name) +account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}" +print(f"{output_dataset.path}") +"azureml://subscriptions/a85ceddd-892e-4637-ae4b-67d15ddf5f2b/resourcegroups/health-ml/workspaces/hi-ml/datastores/workspaceblobstore/paths/output_dataset/" + +from azure.storage.blob import BlobServiceClient +from azure.identity import DefaultAzureCredential +blob_client = BlobServiceClient(account_url=account_url, credential=DefaultAzureCredential()) +container_client = blob_client.get_container_client(datastore.container_name) + +# List all blobs (files) inside a specific folder (prefix) +paths_parts = output_dataset.path.split("/paths/") +assert len(paths_parts) == 2 +folder_name = paths_parts[1] +blob_list = [blob.name for blob in container_client.list_blobs(name_starts_with=folder_name)] From 86b4514a971092e24bf3c07b267c2c309aadd0fc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Nov 2023 11:25:33 +0000 Subject: [PATCH 08/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- hi-ml-azure/testazure/use_run_with_output_dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py index dc559260f..e19b21bed 100644 --- a/hi-ml-azure/testazure/use_run_with_output_dataset.py +++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py @@ -17,6 +17,7 @@ from azure.ai.ml.entities import Data from azure.ai.ml.constants import AssetTypes + data_type = AssetTypes.URI_FILE data = Data(path=output_dataset.path) @@ -31,6 +32,7 @@ from azure.storage.blob import BlobServiceClient from azure.identity import DefaultAzureCredential + blob_client = BlobServiceClient(account_url=account_url, credential=DefaultAzureCredential()) container_client = blob_client.get_container_client(datastore.container_name) From e84ee27ce5281835fefd1ca3896f87ad87e65561 Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Thu, 2 Nov 2023 04:26:37 -0700 Subject: [PATCH 09/13] cleanup --- hi-ml-azure/testazure/use_run_with_output_dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py index dc559260f..bacfae81c 100644 --- a/hi-ml-azure/testazure/use_run_with_output_dataset.py +++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py @@ -17,6 +17,7 @@ from azure.ai.ml.entities import Data from azure.ai.ml.constants import AssetTypes + data_type = AssetTypes.URI_FILE data = Data(path=output_dataset.path) @@ -27,10 +28,10 @@ print(datastore.container_name) account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}" print(f"{output_dataset.path}") -"azureml://subscriptions/a85ceddd-892e-4637-ae4b-67d15ddf5f2b/resourcegroups/health-ml/workspaces/hi-ml/datastores/workspaceblobstore/paths/output_dataset/" from azure.storage.blob import BlobServiceClient from azure.identity import DefaultAzureCredential + blob_client = BlobServiceClient(account_url=account_url, credential=DefaultAzureCredential()) container_client = blob_client.get_container_client(datastore.container_name) From a7f9dab310b36e83e245d3f3bfc38ad8b16f71de Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Thu, 2 Nov 2023 04:28:22 -0700 Subject: [PATCH 10/13] doc --- hi-ml-azure/src/health_azure/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hi-ml-azure/src/health_azure/datasets.py b/hi-ml-azure/src/health_azure/datasets.py index 6b55ccc3b..a2802e32a 100644 --- a/hi-ml-azure/src/health_azure/datasets.py +++ b/hi-ml-azure/src/health_azure/datasets.py @@ -339,8 +339,8 @@ def __init__( :param local_folder: The folder on the local machine at which the dataset is available. This is used only for runs outside of AzureML. If this is empty then the target_folder will be used to mount or download the dataset. - :param register_on_job_completion: Only for output datasets: If this flag is True, the dataset will be - registered in the AML portal after the job has completed and visible in the "Data" section. + :param register_on_job_completion: Only for output datasets when using AML SDK v1: If this flag is True, the + dataset will be registered in the AML portal after the job has completed and visible in the "Data" section. If this flag is False, the dataset will be visible for the job, but not in the AML portal "Data" section. """ # This class would be a good candidate for a dataclass, but having an explicit constructor makes From 029d0fcb99b78fcaa607081e872b424df4c8b3f4 Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Thu, 2 Nov 2023 09:04:59 -0700 Subject: [PATCH 11/13] on-behalf --- hi-ml-azure/src/health_azure/utils.py | 41 +++++++-- .../testazure/use_run_with_output_dataset.py | 88 +++++++++++++------ 2 files changed, 92 insertions(+), 37 deletions(-) diff --git a/hi-ml-azure/src/health_azure/utils.py b/hi-ml-azure/src/health_azure/utils.py index 859a8f3ea..81e5f397f 100644 --- a/hi-ml-azure/src/health_azure/utils.py +++ b/hi-ml-azure/src/health_azure/utils.py @@ -47,6 +47,7 @@ from azure.ai.ml.entities import Job from azure.ai.ml.entities import Workspace as WorkspaceV2 from azure.ai.ml.entities import Environment as EnvironmentV2 +from azure.ai.ml.identity import AzureMLOnBehalfOfCredential from azure.core.credentials import TokenCredential from azure.core.exceptions import ClientAuthenticationError, ResourceNotFoundError from azure.identity import ( @@ -1902,6 +1903,20 @@ def _get_legitimate_default_credential() -> Optional[TokenCredential]: return cred +def _get_legitimate_azureml_credential() -> Optional[TokenCredential]: + """ + Create a AzureMLOnBehalfOfCredential for interacting with Azure resources and validates it. + + :return: A valid Azure credential. + """ + cred = AzureMLOnBehalfOfCredential() + try: + _validate_credential(cred) + return cred + except Exception: + return None + + def _get_legitimate_interactive_browser_credential() -> Optional[TokenCredential]: """ Create an InteractiveBrowser credential for interacting with Azure resources. If the credential can't be @@ -1917,7 +1932,7 @@ def _get_legitimate_interactive_browser_credential() -> Optional[TokenCredential return None -def get_credential() -> Optional[TokenCredential]: +def get_credential() -> TokenCredential: """ Get a credential for authenticating with Azure. There are multiple ways to retrieve a credential. If environment variables pertaining to details of a Service Principal are available, those will be used @@ -1926,9 +1941,10 @@ def get_credential() -> Optional[TokenCredential]: device code (which requires the user to visit a link and enter a provided code). If this fails, or if running in Azure, DefaultAzureCredential will be used which iterates through a number of possible authentication methods including identifying an Azure managed identity, cached credentials from VS code, Azure CLI, Powershell etc. - Otherwise returns None. + If not of those works, a ValueError is raised. :return: Any of the aforementioned credentials if available, else None. + :raises ValueError: If no credential can be retrieved. """ service_principal_id = get_secret_from_environment(ENV_SERVICE_PRINCIPAL_ID, allow_missing=True) tenant_id = get_secret_from_environment(ENV_TENANT_ID, allow_missing=True) @@ -1938,17 +1954,24 @@ def get_credential() -> Optional[TokenCredential]: return _get_legitimate_service_principal_credential(tenant_id, service_principal_id, service_principal_password) try: + # When running in AzureML, this will also try managed identity. cred = _get_legitimate_default_credential() if cred is not None: return cred except ClientAuthenticationError: - cred = _get_legitimate_device_code_credential() - if cred is not None: - return cred - - cred = _get_legitimate_interactive_browser_credential() - if cred is not None: - return cred + if is_running_in_azure_ml(): + # In AzureML, we can try the OnBehalfOf credential + cred = _get_legitimate_azureml_credential() + if cred is not None: + return cred + else: + # Outside of AzureML, try any of the interactive authentication methods + cred = _get_legitimate_device_code_credential() + if cred is not None: + return cred + cred = _get_legitimate_interactive_browser_credential() + if cred is not None: + return cred raise ValueError( "Unable to generate and validate a credential. Please see Azure ML documentation" diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py index bacfae81c..c8072ced8 100644 --- a/hi-ml-azure/testazure/use_run_with_output_dataset.py +++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py @@ -1,42 +1,74 @@ +import os from pathlib import Path import sys +from azure.ai.ml import MLClient +from azure.ai.ml.identity import AzureMLOnBehalfOfCredential + src_root = Path(__file__).parents[1] / "src" sys.path.append(str(src_root)) -from health_azure.utils import get_ml_client, get_workspace - -run_id = "sincere_yacht_xjz95gwvq8" -workspace = get_workspace() -run = workspace.get_run(run_id) - +from health_azure.himl import submit_to_azure_if_needed +from health_azure.utils import get_ml_client, get_workspace, get_credential +from azure.storage.blob import BlobServiceClient -ml_client = get_ml_client() -job = ml_client.jobs.get(run_id) -output_dataset = job.outputs["OUTPUT_0"] -from azure.ai.ml.entities import Data -from azure.ai.ml.constants import AssetTypes +def main() -> None: + # Check out if we can get the credential + credential = AzureMLOnBehalfOfCredential() + try: + credential.get_token("https://management.azure.com/.default") + except Exception: + print("Failed to get the credential") + uri = os.environ["MLFLOW_TRACKING_URI"] + uri_segments = uri.split("/") + subscription_id = uri_segments[uri_segments.index("subscriptions") + 1] + resource_group_name = uri_segments[uri_segments.index("resourceGroups") + 1] + workspace_name = uri_segments[uri_segments.index("workspaces") + 1] + credential = AzureMLOnBehalfOfCredential() + client = MLClient( + credential=credential, + subscription_id=subscription_id, + resource_group_name=resource_group_name, + workspace_name=workspace_name, + ) + print("Got the client") + run_id = "sincere_yacht_xjz95gwvq8" + workspace = get_workspace() + run = workspace.get_run(run_id) + if hasattr(run, "output_datasets"): + print(run.output_datasets) + else: + print("No output datasets") -data_type = AssetTypes.URI_FILE + ml_client = get_ml_client() + job = ml_client.jobs.get(run_id) + output_dataset = job.outputs["OUTPUT_0"] -data = Data(path=output_dataset.path) -# data.mount(ml_client) + datastore = ml_client.datastores.get("workspaceblobstore") + print(datastore.account_name) + print(datastore.container_name) + account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}" + print(f"{output_dataset.path}") -datastore = ml_client.datastores.get("workspaceblobstore") -print(datastore.account_name) -print(datastore.container_name) -account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}" -print(f"{output_dataset.path}") + blob_client = BlobServiceClient(account_url=account_url, credential=get_credential()) + container_client = blob_client.get_container_client(datastore.container_name) -from azure.storage.blob import BlobServiceClient -from azure.identity import DefaultAzureCredential + # List all blobs (files) inside a specific folder (prefix) + paths_parts = output_dataset.path.split("/paths/") + assert len(paths_parts) == 2 + folder_name = paths_parts[1] + blob_list = [blob.name for blob in container_client.list_blobs(name_starts_with=folder_name)] + print(f"Files in {folder_name}:") + for blob_name in blob_list: + print(blob_name) -blob_client = BlobServiceClient(account_url=account_url, credential=DefaultAzureCredential()) -container_client = blob_client.get_container_client(datastore.container_name) -# List all blobs (files) inside a specific folder (prefix) -paths_parts = output_dataset.path.split("/paths/") -assert len(paths_parts) == 2 -folder_name = paths_parts[1] -blob_list = [blob.name for blob in container_client.list_blobs(name_starts_with=folder_name)] +if __name__ == "__main__": + submit_to_azure_if_needed( + snapshot_root_directory=Path(__file__).parents[2], + compute_cluster_name="ds2-with-id", + strictly_aml_v1=True, + submit_to_azureml=True, + ) + main() From f049798383587b802d54b6dfb426c931548f265e Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Thu, 2 Nov 2023 09:05:44 -0700 Subject: [PATCH 12/13] wip --- hi-ml-azure/.vscode/launch.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hi-ml-azure/.vscode/launch.json b/hi-ml-azure/.vscode/launch.json index d32c40951..040574f76 100644 --- a/hi-ml-azure/.vscode/launch.json +++ b/hi-ml-azure/.vscode/launch.json @@ -20,6 +20,14 @@ "console": "integratedTerminal", "justMyCode": false }, + { + "name": "Use output datasets", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/testazure/use_run_with_output_dataset.py", + "console": "integratedTerminal", + "justMyCode": false, + }, { "name": "Run example script in AzureML", "type": "python", From 645bb1d3084eb4163e5065f6f4944be4f9245781 Mon Sep 17 00:00:00 2001 From: Anton Schwaighofer Date: Thu, 2 Nov 2023 09:34:32 -0700 Subject: [PATCH 13/13] wip --- hi-ml-azure/src/health_azure/utils.py | 21 +++---------------- .../testazure/use_run_with_output_dataset.py | 11 ++++++---- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/hi-ml-azure/src/health_azure/utils.py b/hi-ml-azure/src/health_azure/utils.py index 81e5f397f..af6a250bc 100644 --- a/hi-ml-azure/src/health_azure/utils.py +++ b/hi-ml-azure/src/health_azure/utils.py @@ -1903,20 +1903,6 @@ def _get_legitimate_default_credential() -> Optional[TokenCredential]: return cred -def _get_legitimate_azureml_credential() -> Optional[TokenCredential]: - """ - Create a AzureMLOnBehalfOfCredential for interacting with Azure resources and validates it. - - :return: A valid Azure credential. - """ - cred = AzureMLOnBehalfOfCredential() - try: - _validate_credential(cred) - return cred - except Exception: - return None - - def _get_legitimate_interactive_browser_credential() -> Optional[TokenCredential]: """ Create an InteractiveBrowser credential for interacting with Azure resources. If the credential can't be @@ -1960,10 +1946,9 @@ def get_credential() -> TokenCredential: return cred except ClientAuthenticationError: if is_running_in_azure_ml(): - # In AzureML, we can try the OnBehalfOf credential - cred = _get_legitimate_azureml_credential() - if cred is not None: - return cred + # In AzureML, we can try the AzureMLOnBehalfOfCredential credential. This credential does not need + # to be validated (in fact, it raises errors when we try to validate it by getting a token) + return AzureMLOnBehalfOfCredential() else: # Outside of AzureML, try any of the interactive authentication methods cred = _get_legitimate_device_code_credential() diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py index c8072ced8..7441a3e5f 100644 --- a/hi-ml-azure/testazure/use_run_with_output_dataset.py +++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py @@ -26,13 +26,14 @@ def main() -> None: resource_group_name = uri_segments[uri_segments.index("resourceGroups") + 1] workspace_name = uri_segments[uri_segments.index("workspaces") + 1] credential = AzureMLOnBehalfOfCredential() - client = MLClient( + ml_client = MLClient( credential=credential, subscription_id=subscription_id, resource_group_name=resource_group_name, workspace_name=workspace_name, ) print("Got the client") + run_id = "sincere_yacht_xjz95gwvq8" workspace = get_workspace() run = workspace.get_run(run_id) @@ -41,7 +42,6 @@ def main() -> None: else: print("No output datasets") - ml_client = get_ml_client() job = ml_client.jobs.get(run_id) output_dataset = job.outputs["OUTPUT_0"] @@ -51,7 +51,7 @@ def main() -> None: account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}" print(f"{output_dataset.path}") - blob_client = BlobServiceClient(account_url=account_url, credential=get_credential()) + blob_client = BlobServiceClient(account_url=account_url, credential=credential) container_client = blob_client.get_container_client(datastore.container_name) # List all blobs (files) inside a specific folder (prefix) @@ -63,11 +63,14 @@ def main() -> None: for blob_name in blob_list: print(blob_name) + # Get the client without further authentication. + ml_client2 = get_ml_client() + if __name__ == "__main__": submit_to_azure_if_needed( snapshot_root_directory=Path(__file__).parents[2], - compute_cluster_name="ds2-with-id", + compute_cluster_name="lite-testing-ds2", strictly_aml_v1=True, submit_to_azureml=True, )