diff --git a/hi-ml-azure/.vscode/launch.json b/hi-ml-azure/.vscode/launch.json index b0cdf7886..040574f76 100644 --- a/hi-ml-azure/.vscode/launch.json +++ b/hi-ml-azure/.vscode/launch.json @@ -4,6 +4,30 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": false + }, + { + "name": "Output datasets", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/testazure/job_with_output_dataset.py", + "console": "integratedTerminal", + "justMyCode": false + }, + { + "name": "Use output datasets", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/testazure/use_run_with_output_dataset.py", + "console": "integratedTerminal", + "justMyCode": false, + }, { "name": "Run example script in AzureML", "type": "python", diff --git a/hi-ml-azure/Makefile b/hi-ml-azure/Makefile index e4172758f..5b8bd221d 100644 --- a/hi-ml-azure/Makefile +++ b/hi-ml-azure/Makefile @@ -86,3 +86,8 @@ test_all: pip_test call_flake8 call_mypy call_pytest_and_coverage example: pip_local echo 'edit src/health/azure/examples/elevate_this.py to reference your compute_cluster_name' cd src/health/azure/examples; python elevate_this.py --azureml --message 'running example from makefile' + +# Create conda environment +env: + conda env remove -n himl-azure + conda env create -f environment.yml diff --git a/hi-ml-azure/environment.yml b/hi-ml-azure/environment.yml index 55a687735..c42e94adb 100644 --- a/hi-ml-azure/environment.yml +++ b/hi-ml-azure/environment.yml @@ -4,3 +4,20 @@ name: himl-azure dependencies: - pip=20.1.1 - python=3.7.3 + - pip: + - azure-ai-ml>=1.1.1 + - azureml-core>=1.42.0 + - azureml-dataset-runtime[fuse]>=1.42.0 + - azureml-mlflow>=1.42.0 + - azure-storage-blob==12.10.0 + - azureml-tensorboard>=1.42.0 + - azureml-train-core>=1.42.0 + - conda-merge>=0.1.5 + - mlflow>=1.29.0 + - pandas>=1.3.4 + - param>=1.12 + - protobuf<4.0 + - pysocks>=1.5.8 + - ruamel.yaml>=0.16.12 + - tensorboard>=2.6.0 + - typing-extensions>=4.3.0 diff --git a/hi-ml-azure/src/health_azure/datasets.py b/hi-ml-azure/src/health_azure/datasets.py index 4c2edc359..a2802e32a 100644 --- a/hi-ml-azure/src/health_azure/datasets.py +++ b/hi-ml-azure/src/health_azure/datasets.py @@ -318,6 +318,7 @@ def __init__( use_mounting: Optional[bool] = None, target_folder: Optional[PathOrString] = None, local_folder: Optional[PathOrString] = None, + register_on_job_completion: bool = True, ): """ :param name: The name of the dataset, as it was registered in the AzureML workspace. For output datasets, @@ -338,6 +339,9 @@ def __init__( :param local_folder: The folder on the local machine at which the dataset is available. This is used only for runs outside of AzureML. If this is empty then the target_folder will be used to mount or download the dataset. + :param register_on_job_completion: Only for output datasets when using AML SDK v1: If this flag is True, the + dataset will be registered in the AML portal after the job has completed and visible in the "Data" section. + If this flag is False, the dataset will be visible for the job, but not in the AML portal "Data" section. """ # This class would be a good candidate for a dataclass, but having an explicit constructor makes # documentation tools in the editor work nicer. @@ -354,6 +358,7 @@ def __init__( if str(self.target_folder) == ".": raise ValueError("Can't mount or download a dataset to the current working directory.") self.local_folder = Path(local_folder) if local_folder else None + self.register_on_job_completion = register_on_job_completion def to_input_dataset_local( self, @@ -463,8 +468,8 @@ def to_output_dataset(self, workspace: Workspace, dataset_index: int) -> OutputF dataset = OutputFileDatasetConfig( name=_output_dataset_key(index=dataset_index), destination=(datastore, self.name + "/") ) - # TODO: Can we get tags into here too? - dataset = dataset.register_on_complete(name=self.name) + if self.register_on_job_completion: + dataset = dataset.register_on_complete(name=self.name) if self.target_folder: raise ValueError("Output datasets can't have a target_folder set.") use_mounting = True if self.use_mounting is None else self.use_mounting diff --git a/hi-ml-azure/src/health_azure/utils.py b/hi-ml-azure/src/health_azure/utils.py index 859a8f3ea..af6a250bc 100644 --- a/hi-ml-azure/src/health_azure/utils.py +++ b/hi-ml-azure/src/health_azure/utils.py @@ -47,6 +47,7 @@ from azure.ai.ml.entities import Job from azure.ai.ml.entities import Workspace as WorkspaceV2 from azure.ai.ml.entities import Environment as EnvironmentV2 +from azure.ai.ml.identity import AzureMLOnBehalfOfCredential from azure.core.credentials import TokenCredential from azure.core.exceptions import ClientAuthenticationError, ResourceNotFoundError from azure.identity import ( @@ -1917,7 +1918,7 @@ def _get_legitimate_interactive_browser_credential() -> Optional[TokenCredential return None -def get_credential() -> Optional[TokenCredential]: +def get_credential() -> TokenCredential: """ Get a credential for authenticating with Azure. There are multiple ways to retrieve a credential. If environment variables pertaining to details of a Service Principal are available, those will be used @@ -1926,9 +1927,10 @@ def get_credential() -> Optional[TokenCredential]: device code (which requires the user to visit a link and enter a provided code). If this fails, or if running in Azure, DefaultAzureCredential will be used which iterates through a number of possible authentication methods including identifying an Azure managed identity, cached credentials from VS code, Azure CLI, Powershell etc. - Otherwise returns None. + If not of those works, a ValueError is raised. :return: Any of the aforementioned credentials if available, else None. + :raises ValueError: If no credential can be retrieved. """ service_principal_id = get_secret_from_environment(ENV_SERVICE_PRINCIPAL_ID, allow_missing=True) tenant_id = get_secret_from_environment(ENV_TENANT_ID, allow_missing=True) @@ -1938,17 +1940,23 @@ def get_credential() -> Optional[TokenCredential]: return _get_legitimate_service_principal_credential(tenant_id, service_principal_id, service_principal_password) try: + # When running in AzureML, this will also try managed identity. cred = _get_legitimate_default_credential() if cred is not None: return cred except ClientAuthenticationError: - cred = _get_legitimate_device_code_credential() - if cred is not None: - return cred - - cred = _get_legitimate_interactive_browser_credential() - if cred is not None: - return cred + if is_running_in_azure_ml(): + # In AzureML, we can try the AzureMLOnBehalfOfCredential credential. This credential does not need + # to be validated (in fact, it raises errors when we try to validate it by getting a token) + return AzureMLOnBehalfOfCredential() + else: + # Outside of AzureML, try any of the interactive authentication methods + cred = _get_legitimate_device_code_credential() + if cred is not None: + return cred + cred = _get_legitimate_interactive_browser_credential() + if cred is not None: + return cred raise ValueError( "Unable to generate and validate a credential. Please see Azure ML documentation" diff --git a/hi-ml-azure/testazure/job_with_output_dataset.py b/hi-ml-azure/testazure/job_with_output_dataset.py new file mode 100644 index 000000000..7770c9791 --- /dev/null +++ b/hi-ml-azure/testazure/job_with_output_dataset.py @@ -0,0 +1,41 @@ +from datetime import datetime +from pathlib import Path +import sys +import uuid + + +src_root = Path(__file__).parents[1] / "src" +sys.path.append(str(src_root)) + +from health_azure import submit_to_azure_if_needed, DatasetConfig + + +def main(): + # Define the output dataset + output_dataset = DatasetConfig( + # The dataset name will also be the name of the folder in the datastore + timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') + suffix = uuid.uuid4().hex[:6], + name=f"joboutputs-{timestamp}-{suffix}", + datastore='workspaceblobstore', + ) + + # Submit the script to Azure if needed + run_info = submit_to_azure_if_needed( + snapshot_root_directory=Path(__file__).parents[1], + output_datasets=[output_dataset], + compute_cluster_name="lite-testing-ds2", + submit_to_azureml=True, + strictly_aml_v1=True, + ) + + output_folder = run_info.output_datasets[0] + print(f"Output folder: {output_folder}") + output_file = output_folder / "output.txt" + output_file.write_text('Hello, world!') + + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py new file mode 100644 index 000000000..7441a3e5f --- /dev/null +++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py @@ -0,0 +1,77 @@ +import os +from pathlib import Path +import sys + +from azure.ai.ml import MLClient +from azure.ai.ml.identity import AzureMLOnBehalfOfCredential + +src_root = Path(__file__).parents[1] / "src" +sys.path.append(str(src_root)) + +from health_azure.himl import submit_to_azure_if_needed +from health_azure.utils import get_ml_client, get_workspace, get_credential +from azure.storage.blob import BlobServiceClient + + +def main() -> None: + # Check out if we can get the credential + credential = AzureMLOnBehalfOfCredential() + try: + credential.get_token("https://management.azure.com/.default") + except Exception: + print("Failed to get the credential") + uri = os.environ["MLFLOW_TRACKING_URI"] + uri_segments = uri.split("/") + subscription_id = uri_segments[uri_segments.index("subscriptions") + 1] + resource_group_name = uri_segments[uri_segments.index("resourceGroups") + 1] + workspace_name = uri_segments[uri_segments.index("workspaces") + 1] + credential = AzureMLOnBehalfOfCredential() + ml_client = MLClient( + credential=credential, + subscription_id=subscription_id, + resource_group_name=resource_group_name, + workspace_name=workspace_name, + ) + print("Got the client") + + run_id = "sincere_yacht_xjz95gwvq8" + workspace = get_workspace() + run = workspace.get_run(run_id) + if hasattr(run, "output_datasets"): + print(run.output_datasets) + else: + print("No output datasets") + + job = ml_client.jobs.get(run_id) + output_dataset = job.outputs["OUTPUT_0"] + + datastore = ml_client.datastores.get("workspaceblobstore") + print(datastore.account_name) + print(datastore.container_name) + account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}" + print(f"{output_dataset.path}") + + blob_client = BlobServiceClient(account_url=account_url, credential=credential) + container_client = blob_client.get_container_client(datastore.container_name) + + # List all blobs (files) inside a specific folder (prefix) + paths_parts = output_dataset.path.split("/paths/") + assert len(paths_parts) == 2 + folder_name = paths_parts[1] + blob_list = [blob.name for blob in container_client.list_blobs(name_starts_with=folder_name)] + print(f"Files in {folder_name}:") + for blob_name in blob_list: + print(blob_name) + + # Get the client without further authentication. + ml_client2 = get_ml_client() + + +if __name__ == "__main__": + submit_to_azure_if_needed( + snapshot_root_directory=Path(__file__).parents[2], + compute_cluster_name="lite-testing-ds2", + strictly_aml_v1=True, + submit_to_azureml=True, + ) + main()