From 49a6633899761ddfe26bd78b054952c2cf980f61 Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Tue, 31 Oct 2023 16:54:38 +0000
Subject: [PATCH 01/13] env creation

---
 hi-ml-azure/Makefile        |  5 +++++
 hi-ml-azure/environment.yml | 17 +++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/hi-ml-azure/Makefile b/hi-ml-azure/Makefile
index e4172758f..5b8bd221d 100644
--- a/hi-ml-azure/Makefile
+++ b/hi-ml-azure/Makefile
@@ -86,3 +86,8 @@ test_all: pip_test call_flake8 call_mypy call_pytest_and_coverage
 example: pip_local
 	echo 'edit src/health/azure/examples/elevate_this.py to reference your compute_cluster_name'
 	cd src/health/azure/examples; python elevate_this.py --azureml --message 'running example from makefile'
+
+# Create conda environment
+env:
+	conda env remove -n himl-azure
+	conda env create -f environment.yml
diff --git a/hi-ml-azure/environment.yml b/hi-ml-azure/environment.yml
index 55a687735..c42e94adb 100644
--- a/hi-ml-azure/environment.yml
+++ b/hi-ml-azure/environment.yml
@@ -4,3 +4,20 @@ name: himl-azure
 dependencies:
   - pip=20.1.1
   - python=3.7.3
+  - pip:
+    - azure-ai-ml>=1.1.1
+    - azureml-core>=1.42.0
+    - azureml-dataset-runtime[fuse]>=1.42.0
+    - azureml-mlflow>=1.42.0
+    - azure-storage-blob==12.10.0
+    - azureml-tensorboard>=1.42.0
+    - azureml-train-core>=1.42.0
+    - conda-merge>=0.1.5
+    - mlflow>=1.29.0
+    - pandas>=1.3.4
+    - param>=1.12
+    - protobuf<4.0
+    - pysocks>=1.5.8
+    - ruamel.yaml>=0.16.12
+    - tensorboard>=2.6.0
+    - typing-extensions>=4.3.0

From 0058865237c407386ba09e2ce994d7d49125b2cb Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Tue, 31 Oct 2023 16:54:53 +0000
Subject: [PATCH 02/13] launch

---
 hi-ml-azure/.vscode/launch.json | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/hi-ml-azure/.vscode/launch.json b/hi-ml-azure/.vscode/launch.json
index b0cdf7886..f271799cc 100644
--- a/hi-ml-azure/.vscode/launch.json
+++ b/hi-ml-azure/.vscode/launch.json
@@ -4,6 +4,14 @@
     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": false
+        },
         {
             "name": "Run example script in AzureML",
             "type": "python",

From 6cfae7b1f4a1251b07cf77a530996ecf1ff35817 Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Tue, 31 Oct 2023 20:56:11 +0000
Subject: [PATCH 03/13] register option

---
 hi-ml-azure/src/health_azure/datasets.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hi-ml-azure/src/health_azure/datasets.py b/hi-ml-azure/src/health_azure/datasets.py
index 4c2edc359..6b55ccc3b 100644
--- a/hi-ml-azure/src/health_azure/datasets.py
+++ b/hi-ml-azure/src/health_azure/datasets.py
@@ -318,6 +318,7 @@ def __init__(
         use_mounting: Optional[bool] = None,
         target_folder: Optional[PathOrString] = None,
         local_folder: Optional[PathOrString] = None,
+        register_on_job_completion: bool = True,
     ):
         """
         :param name: The name of the dataset, as it was registered in the AzureML workspace. For output datasets,
@@ -338,6 +339,9 @@ def __init__(
         :param local_folder: The folder on the local machine at which the dataset is available. This
             is used only for runs outside of AzureML. If this is empty then the target_folder will be used to
             mount or download the dataset.
+        :param register_on_job_completion: Only for output datasets: If this flag is True, the dataset will be
+            registered in the AML portal after the job has completed and visible in the "Data" section.
+            If this flag is False, the dataset will be visible for the job, but not in the AML portal "Data" section.
         """
         # This class would be a good candidate for a dataclass, but having an explicit constructor makes
         # documentation tools in the editor work nicer.
@@ -354,6 +358,7 @@ def __init__(
         if str(self.target_folder) == ".":
             raise ValueError("Can't mount or download a dataset to the current working directory.")
         self.local_folder = Path(local_folder) if local_folder else None
+        self.register_on_job_completion = register_on_job_completion
 
     def to_input_dataset_local(
         self,
@@ -463,8 +468,8 @@ def to_output_dataset(self, workspace: Workspace, dataset_index: int) -> OutputF
         dataset = OutputFileDatasetConfig(
             name=_output_dataset_key(index=dataset_index), destination=(datastore, self.name + "/")
         )
-        # TODO: Can we get tags into here too?
-        dataset = dataset.register_on_complete(name=self.name)
+        if self.register_on_job_completion:
+            dataset = dataset.register_on_complete(name=self.name)
         if self.target_folder:
             raise ValueError("Output datasets can't have a target_folder set.")
         use_mounting = True if self.use_mounting is None else self.use_mounting

From 749f120b7eb9b4b136d35713723d17477bc9c774 Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Tue, 31 Oct 2023 21:00:34 +0000
Subject: [PATCH 04/13] test script

---
 hi-ml-azure/.vscode/launch.json          |  4 +--
 hi-ml-azure/testazure/output_datasets.py | 38 ++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)
 create mode 100644 hi-ml-azure/testazure/output_datasets.py

diff --git a/hi-ml-azure/.vscode/launch.json b/hi-ml-azure/.vscode/launch.json
index f271799cc..1e2245e98 100644
--- a/hi-ml-azure/.vscode/launch.json
+++ b/hi-ml-azure/.vscode/launch.json
@@ -5,10 +5,10 @@
     "version": "0.2.0",
     "configurations": [
         {
-            "name": "Python: Current File",
+            "name": "Output datasets",
             "type": "python",
             "request": "launch",
-            "program": "${file}",
+            "program": "${workspaceFolder}/testazure/output_datasets.py",
             "console": "integratedTerminal",
             "justMyCode": false
         },
diff --git a/hi-ml-azure/testazure/output_datasets.py b/hi-ml-azure/testazure/output_datasets.py
new file mode 100644
index 000000000..5269ead24
--- /dev/null
+++ b/hi-ml-azure/testazure/output_datasets.py
@@ -0,0 +1,38 @@
+from datetime import datetime
+from pathlib import Path
+import sys
+
+
+src_root = Path(__file__).parents[1] / "src"
+sys.path.append(str(src_root))
+
+from health_azure import submit_to_azure_if_needed, DatasetConfig
+
+
+def main():
+    # Define the output dataset
+    output_dataset = DatasetConfig(
+        name='output_dataset',
+        datastore='workspaceblobstore',
+        # path=f"outputs_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}" # Plus a random part
+    )
+
+    # Submit the script to Azure if needed
+    run_info = submit_to_azure_if_needed(
+        snapshot_root_directory=Path(__file__).parents[1],
+        output_datasets=[output_dataset],
+        compute_cluster_name="lite-testing-ds2",
+        submit_to_azureml=True,
+        strictly_aml_v1=True,
+    )
+
+    output_folder = run_info.output_datasets[0]
+    print(f"Output folder: {output_folder}")
+    output_file = output_folder / "output.txt"
+    output_file.write_text('Hello, world!')
+
+    print("Done!")
+
+
+if __name__ == "__main__":
+    main()

From d5e194f4a7723c3540b140691ad06813cf7013c6 Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Tue, 31 Oct 2023 21:26:25 +0000
Subject: [PATCH 05/13] fix suffix

---
 hi-ml-azure/testazure/output_datasets.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/hi-ml-azure/testazure/output_datasets.py b/hi-ml-azure/testazure/output_datasets.py
index 5269ead24..7770c9791 100644
--- a/hi-ml-azure/testazure/output_datasets.py
+++ b/hi-ml-azure/testazure/output_datasets.py
@@ -1,6 +1,7 @@
 from datetime import datetime
 from pathlib import Path
 import sys
+import uuid
 
 
 src_root = Path(__file__).parents[1] / "src"
@@ -12,9 +13,11 @@
 def main():
     # Define the output dataset
     output_dataset = DatasetConfig(
-        name='output_dataset',
+        # The dataset name will also be the name of the folder in the datastore
+        timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
+        suffix = uuid.uuid4().hex[:6],
+        name=f"joboutputs-{timestamp}-{suffix}",
         datastore='workspaceblobstore',
-        # path=f"outputs_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}" # Plus a random part
     )
 
     # Submit the script to Azure if needed

From 86f8cea685ffcc8c6e93f4a8e71f4115e3b976de Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Tue, 31 Oct 2023 22:11:52 +0000
Subject: [PATCH 06/13] rename

---
 .../testazure/{output_datasets.py => job_with_output_dataset.py}  | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename hi-ml-azure/testazure/{output_datasets.py => job_with_output_dataset.py} (100%)

diff --git a/hi-ml-azure/testazure/output_datasets.py b/hi-ml-azure/testazure/job_with_output_dataset.py
similarity index 100%
rename from hi-ml-azure/testazure/output_datasets.py
rename to hi-ml-azure/testazure/job_with_output_dataset.py

From c6ecb3ee3401b239e8ff10514b1994f2284250ef Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Tue, 31 Oct 2023 22:12:01 +0000
Subject: [PATCH 07/13] wip

---
 hi-ml-azure/.vscode/launch.json               | 10 ++++-
 .../testazure/use_run_with_output_dataset.py  | 41 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 hi-ml-azure/testazure/use_run_with_output_dataset.py

diff --git a/hi-ml-azure/.vscode/launch.json b/hi-ml-azure/.vscode/launch.json
index 1e2245e98..d32c40951 100644
--- a/hi-ml-azure/.vscode/launch.json
+++ b/hi-ml-azure/.vscode/launch.json
@@ -4,11 +4,19 @@
     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": false
+        },
         {
             "name": "Output datasets",
             "type": "python",
             "request": "launch",
-            "program": "${workspaceFolder}/testazure/output_datasets.py",
+            "program": "${workspaceFolder}/testazure/job_with_output_dataset.py",
             "console": "integratedTerminal",
             "justMyCode": false
         },
diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py
new file mode 100644
index 000000000..dc559260f
--- /dev/null
+++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py
@@ -0,0 +1,41 @@
+from pathlib import Path
+import sys
+
+src_root = Path(__file__).parents[1] / "src"
+sys.path.append(str(src_root))
+
+from health_azure.utils import get_ml_client, get_workspace
+
+run_id = "sincere_yacht_xjz95gwvq8"
+workspace = get_workspace()
+run = workspace.get_run(run_id)
+
+
+ml_client = get_ml_client()
+job = ml_client.jobs.get(run_id)
+output_dataset = job.outputs["OUTPUT_0"]
+
+from azure.ai.ml.entities import Data
+from azure.ai.ml.constants import AssetTypes
+data_type = AssetTypes.URI_FILE
+
+data = Data(path=output_dataset.path)
+# data.mount(ml_client)
+
+datastore = ml_client.datastores.get("workspaceblobstore")
+print(datastore.account_name)
+print(datastore.container_name)
+account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}"
+print(f"{output_dataset.path}")
+"azureml://subscriptions/a85ceddd-892e-4637-ae4b-67d15ddf5f2b/resourcegroups/health-ml/workspaces/hi-ml/datastores/workspaceblobstore/paths/output_dataset/"
+
+from azure.storage.blob import BlobServiceClient
+from azure.identity import DefaultAzureCredential
+blob_client = BlobServiceClient(account_url=account_url, credential=DefaultAzureCredential())
+container_client = blob_client.get_container_client(datastore.container_name)
+
+# List all blobs (files) inside a specific folder (prefix)
+paths_parts = output_dataset.path.split("/paths/")
+assert len(paths_parts) == 2
+folder_name = paths_parts[1]
+blob_list = [blob.name for blob in container_client.list_blobs(name_starts_with=folder_name)]

From 86b4514a971092e24bf3c07b267c2c309aadd0fc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 2 Nov 2023 11:25:33 +0000
Subject: [PATCH 08/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 hi-ml-azure/testazure/use_run_with_output_dataset.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py
index dc559260f..e19b21bed 100644
--- a/hi-ml-azure/testazure/use_run_with_output_dataset.py
+++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py
@@ -17,6 +17,7 @@
 
 from azure.ai.ml.entities import Data
 from azure.ai.ml.constants import AssetTypes
+
 data_type = AssetTypes.URI_FILE
 
 data = Data(path=output_dataset.path)
@@ -31,6 +32,7 @@
 
 from azure.storage.blob import BlobServiceClient
 from azure.identity import DefaultAzureCredential
+
 blob_client = BlobServiceClient(account_url=account_url, credential=DefaultAzureCredential())
 container_client = blob_client.get_container_client(datastore.container_name)
 

From e84ee27ce5281835fefd1ca3896f87ad87e65561 Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Thu, 2 Nov 2023 04:26:37 -0700
Subject: [PATCH 09/13] cleanup

---
 hi-ml-azure/testazure/use_run_with_output_dataset.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py
index dc559260f..bacfae81c 100644
--- a/hi-ml-azure/testazure/use_run_with_output_dataset.py
+++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py
@@ -17,6 +17,7 @@
 
 from azure.ai.ml.entities import Data
 from azure.ai.ml.constants import AssetTypes
+
 data_type = AssetTypes.URI_FILE
 
 data = Data(path=output_dataset.path)
@@ -27,10 +28,10 @@
 print(datastore.container_name)
 account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}"
 print(f"{output_dataset.path}")
-"azureml://subscriptions/a85ceddd-892e-4637-ae4b-67d15ddf5f2b/resourcegroups/health-ml/workspaces/hi-ml/datastores/workspaceblobstore/paths/output_dataset/"
 
 from azure.storage.blob import BlobServiceClient
 from azure.identity import DefaultAzureCredential
+
 blob_client = BlobServiceClient(account_url=account_url, credential=DefaultAzureCredential())
 container_client = blob_client.get_container_client(datastore.container_name)
 

From a7f9dab310b36e83e245d3f3bfc38ad8b16f71de Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Thu, 2 Nov 2023 04:28:22 -0700
Subject: [PATCH 10/13] doc

---
 hi-ml-azure/src/health_azure/datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hi-ml-azure/src/health_azure/datasets.py b/hi-ml-azure/src/health_azure/datasets.py
index 6b55ccc3b..a2802e32a 100644
--- a/hi-ml-azure/src/health_azure/datasets.py
+++ b/hi-ml-azure/src/health_azure/datasets.py
@@ -339,8 +339,8 @@ def __init__(
         :param local_folder: The folder on the local machine at which the dataset is available. This
             is used only for runs outside of AzureML. If this is empty then the target_folder will be used to
             mount or download the dataset.
-        :param register_on_job_completion: Only for output datasets: If this flag is True, the dataset will be
-            registered in the AML portal after the job has completed and visible in the "Data" section.
+        :param register_on_job_completion: Only for output datasets when using AML SDK v1: If this flag is True, the
+            dataset will be registered in the AML portal after the job has completed and visible in the "Data" section.
             If this flag is False, the dataset will be visible for the job, but not in the AML portal "Data" section.
         """
         # This class would be a good candidate for a dataclass, but having an explicit constructor makes

From 029d0fcb99b78fcaa607081e872b424df4c8b3f4 Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Thu, 2 Nov 2023 09:04:59 -0700
Subject: [PATCH 11/13] on-behalf

---
 hi-ml-azure/src/health_azure/utils.py         | 41 +++++++--
 .../testazure/use_run_with_output_dataset.py  | 88 +++++++++++++------
 2 files changed, 92 insertions(+), 37 deletions(-)

diff --git a/hi-ml-azure/src/health_azure/utils.py b/hi-ml-azure/src/health_azure/utils.py
index 859a8f3ea..81e5f397f 100644
--- a/hi-ml-azure/src/health_azure/utils.py
+++ b/hi-ml-azure/src/health_azure/utils.py
@@ -47,6 +47,7 @@
 from azure.ai.ml.entities import Job
 from azure.ai.ml.entities import Workspace as WorkspaceV2
 from azure.ai.ml.entities import Environment as EnvironmentV2
+from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
 from azure.core.credentials import TokenCredential
 from azure.core.exceptions import ClientAuthenticationError, ResourceNotFoundError
 from azure.identity import (
@@ -1902,6 +1903,20 @@ def _get_legitimate_default_credential() -> Optional[TokenCredential]:
     return cred
 
 
+def _get_legitimate_azureml_credential() -> Optional[TokenCredential]:
+    """
+    Create a AzureMLOnBehalfOfCredential for interacting with Azure resources and validates it.
+
+    :return: A valid Azure credential.
+    """
+    cred = AzureMLOnBehalfOfCredential()
+    try:
+        _validate_credential(cred)
+        return cred
+    except Exception:
+        return None
+
+
 def _get_legitimate_interactive_browser_credential() -> Optional[TokenCredential]:
     """
     Create an InteractiveBrowser credential for interacting with Azure resources. If the credential can't be
@@ -1917,7 +1932,7 @@ def _get_legitimate_interactive_browser_credential() -> Optional[TokenCredential
         return None
 
 
-def get_credential() -> Optional[TokenCredential]:
+def get_credential() -> TokenCredential:
     """
     Get a credential for authenticating with Azure. There are multiple ways to retrieve a credential.
     If environment variables pertaining to details of a Service Principal are available, those will be used
@@ -1926,9 +1941,10 @@ def get_credential() -> Optional[TokenCredential]:
     device code (which requires the user to visit a link and enter a provided code). If this fails, or if running in
     Azure, DefaultAzureCredential will be used which iterates through a number of possible authentication methods
     including identifying an Azure managed identity, cached credentials from VS code, Azure CLI, Powershell etc.
-    Otherwise returns None.
+    If not of those works, a ValueError is raised.
 
     :return: Any of the aforementioned credentials if available, else None.
+    :raises ValueError: If no credential can be retrieved.
     """
     service_principal_id = get_secret_from_environment(ENV_SERVICE_PRINCIPAL_ID, allow_missing=True)
     tenant_id = get_secret_from_environment(ENV_TENANT_ID, allow_missing=True)
@@ -1938,17 +1954,24 @@ def get_credential() -> Optional[TokenCredential]:
         return _get_legitimate_service_principal_credential(tenant_id, service_principal_id, service_principal_password)
 
     try:
+        # When running in AzureML, this will also try managed identity.
         cred = _get_legitimate_default_credential()
         if cred is not None:
             return cred
     except ClientAuthenticationError:
-        cred = _get_legitimate_device_code_credential()
-        if cred is not None:
-            return cred
-
-        cred = _get_legitimate_interactive_browser_credential()
-        if cred is not None:
-            return cred
+        if is_running_in_azure_ml():
+            # In AzureML, we can try the OnBehalfOf credential
+            cred = _get_legitimate_azureml_credential()
+            if cred is not None:
+                return cred
+        else:
+            # Outside of AzureML, try any of the interactive authentication methods
+            cred = _get_legitimate_device_code_credential()
+            if cred is not None:
+                return cred
+            cred = _get_legitimate_interactive_browser_credential()
+            if cred is not None:
+                return cred
 
     raise ValueError(
         "Unable to generate and validate a credential. Please see Azure ML documentation"
diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py
index bacfae81c..c8072ced8 100644
--- a/hi-ml-azure/testazure/use_run_with_output_dataset.py
+++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py
@@ -1,42 +1,74 @@
+import os
 from pathlib import Path
 import sys
 
+from azure.ai.ml import MLClient
+from azure.ai.ml.identity import AzureMLOnBehalfOfCredential
+
 src_root = Path(__file__).parents[1] / "src"
 sys.path.append(str(src_root))
 
-from health_azure.utils import get_ml_client, get_workspace
-
-run_id = "sincere_yacht_xjz95gwvq8"
-workspace = get_workspace()
-run = workspace.get_run(run_id)
-
+from health_azure.himl import submit_to_azure_if_needed
+from health_azure.utils import get_ml_client, get_workspace, get_credential
+from azure.storage.blob import BlobServiceClient
 
-ml_client = get_ml_client()
-job = ml_client.jobs.get(run_id)
-output_dataset = job.outputs["OUTPUT_0"]
 
-from azure.ai.ml.entities import Data
-from azure.ai.ml.constants import AssetTypes
+def main() -> None:
+    # Check out if we can get the credential
+    credential = AzureMLOnBehalfOfCredential()
+    try:
+        credential.get_token("https://management.azure.com/.default")
+    except Exception:
+        print("Failed to get the credential")
+    uri = os.environ["MLFLOW_TRACKING_URI"]
+    uri_segments = uri.split("/")
+    subscription_id = uri_segments[uri_segments.index("subscriptions") + 1]
+    resource_group_name = uri_segments[uri_segments.index("resourceGroups") + 1]
+    workspace_name = uri_segments[uri_segments.index("workspaces") + 1]
+    credential = AzureMLOnBehalfOfCredential()
+    client = MLClient(
+        credential=credential,
+        subscription_id=subscription_id,
+        resource_group_name=resource_group_name,
+        workspace_name=workspace_name,
+    )
+    print("Got the client")
+    run_id = "sincere_yacht_xjz95gwvq8"
+    workspace = get_workspace()
+    run = workspace.get_run(run_id)
+    if hasattr(run, "output_datasets"):
+        print(run.output_datasets)
+    else:
+        print("No output datasets")
 
-data_type = AssetTypes.URI_FILE
+    ml_client = get_ml_client()
+    job = ml_client.jobs.get(run_id)
+    output_dataset = job.outputs["OUTPUT_0"]
 
-data = Data(path=output_dataset.path)
-# data.mount(ml_client)
+    datastore = ml_client.datastores.get("workspaceblobstore")
+    print(datastore.account_name)
+    print(datastore.container_name)
+    account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}"
+    print(f"{output_dataset.path}")
 
-datastore = ml_client.datastores.get("workspaceblobstore")
-print(datastore.account_name)
-print(datastore.container_name)
-account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}"
-print(f"{output_dataset.path}")
+    blob_client = BlobServiceClient(account_url=account_url, credential=get_credential())
+    container_client = blob_client.get_container_client(datastore.container_name)
 
-from azure.storage.blob import BlobServiceClient
-from azure.identity import DefaultAzureCredential
+    # List all blobs (files) inside a specific folder (prefix)
+    paths_parts = output_dataset.path.split("/paths/")
+    assert len(paths_parts) == 2
+    folder_name = paths_parts[1]
+    blob_list = [blob.name for blob in container_client.list_blobs(name_starts_with=folder_name)]
+    print(f"Files in {folder_name}:")
+    for blob_name in blob_list:
+        print(blob_name)
 
-blob_client = BlobServiceClient(account_url=account_url, credential=DefaultAzureCredential())
-container_client = blob_client.get_container_client(datastore.container_name)
 
-# List all blobs (files) inside a specific folder (prefix)
-paths_parts = output_dataset.path.split("/paths/")
-assert len(paths_parts) == 2
-folder_name = paths_parts[1]
-blob_list = [blob.name for blob in container_client.list_blobs(name_starts_with=folder_name)]
+if __name__ == "__main__":
+    submit_to_azure_if_needed(
+        snapshot_root_directory=Path(__file__).parents[2],
+        compute_cluster_name="ds2-with-id",
+        strictly_aml_v1=True,
+        submit_to_azureml=True,
+    )
+    main()

From f049798383587b802d54b6dfb426c931548f265e Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Thu, 2 Nov 2023 09:05:44 -0700
Subject: [PATCH 12/13] wip

---
 hi-ml-azure/.vscode/launch.json | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/hi-ml-azure/.vscode/launch.json b/hi-ml-azure/.vscode/launch.json
index d32c40951..040574f76 100644
--- a/hi-ml-azure/.vscode/launch.json
+++ b/hi-ml-azure/.vscode/launch.json
@@ -20,6 +20,14 @@
             "console": "integratedTerminal",
             "justMyCode": false
         },
+        {
+            "name": "Use output datasets",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/testazure/use_run_with_output_dataset.py",
+            "console": "integratedTerminal",
+            "justMyCode": false,
+        },
         {
             "name": "Run example script in AzureML",
             "type": "python",

From 645bb1d3084eb4163e5065f6f4944be4f9245781 Mon Sep 17 00:00:00 2001
From: Anton Schwaighofer <antonsc@microsoft.com>
Date: Thu, 2 Nov 2023 09:34:32 -0700
Subject: [PATCH 13/13] wip

---
 hi-ml-azure/src/health_azure/utils.py         | 21 +++----------------
 .../testazure/use_run_with_output_dataset.py  | 11 ++++++----
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/hi-ml-azure/src/health_azure/utils.py b/hi-ml-azure/src/health_azure/utils.py
index 81e5f397f..af6a250bc 100644
--- a/hi-ml-azure/src/health_azure/utils.py
+++ b/hi-ml-azure/src/health_azure/utils.py
@@ -1903,20 +1903,6 @@ def _get_legitimate_default_credential() -> Optional[TokenCredential]:
     return cred
 
 
-def _get_legitimate_azureml_credential() -> Optional[TokenCredential]:
-    """
-    Create a AzureMLOnBehalfOfCredential for interacting with Azure resources and validates it.
-
-    :return: A valid Azure credential.
-    """
-    cred = AzureMLOnBehalfOfCredential()
-    try:
-        _validate_credential(cred)
-        return cred
-    except Exception:
-        return None
-
-
 def _get_legitimate_interactive_browser_credential() -> Optional[TokenCredential]:
     """
     Create an InteractiveBrowser credential for interacting with Azure resources. If the credential can't be
@@ -1960,10 +1946,9 @@ def get_credential() -> TokenCredential:
             return cred
     except ClientAuthenticationError:
         if is_running_in_azure_ml():
-            # In AzureML, we can try the OnBehalfOf credential
-            cred = _get_legitimate_azureml_credential()
-            if cred is not None:
-                return cred
+            # In AzureML, we can try the AzureMLOnBehalfOfCredential credential. This credential does not need
+            # to be validated (in fact, it raises errors when we try to validate it by getting a token)
+            return AzureMLOnBehalfOfCredential()
         else:
             # Outside of AzureML, try any of the interactive authentication methods
             cred = _get_legitimate_device_code_credential()
diff --git a/hi-ml-azure/testazure/use_run_with_output_dataset.py b/hi-ml-azure/testazure/use_run_with_output_dataset.py
index c8072ced8..7441a3e5f 100644
--- a/hi-ml-azure/testazure/use_run_with_output_dataset.py
+++ b/hi-ml-azure/testazure/use_run_with_output_dataset.py
@@ -26,13 +26,14 @@ def main() -> None:
     resource_group_name = uri_segments[uri_segments.index("resourceGroups") + 1]
     workspace_name = uri_segments[uri_segments.index("workspaces") + 1]
     credential = AzureMLOnBehalfOfCredential()
-    client = MLClient(
+    ml_client = MLClient(
         credential=credential,
         subscription_id=subscription_id,
         resource_group_name=resource_group_name,
         workspace_name=workspace_name,
     )
     print("Got the client")
+
     run_id = "sincere_yacht_xjz95gwvq8"
     workspace = get_workspace()
     run = workspace.get_run(run_id)
@@ -41,7 +42,6 @@ def main() -> None:
     else:
         print("No output datasets")
 
-    ml_client = get_ml_client()
     job = ml_client.jobs.get(run_id)
     output_dataset = job.outputs["OUTPUT_0"]
 
@@ -51,7 +51,7 @@ def main() -> None:
     account_url = f"{datastore.protocol}://{datastore.account_name}.blob.{datastore.endpoint}"
     print(f"{output_dataset.path}")
 
-    blob_client = BlobServiceClient(account_url=account_url, credential=get_credential())
+    blob_client = BlobServiceClient(account_url=account_url, credential=credential)
     container_client = blob_client.get_container_client(datastore.container_name)
 
     # List all blobs (files) inside a specific folder (prefix)
@@ -63,11 +63,14 @@ def main() -> None:
     for blob_name in blob_list:
         print(blob_name)
 
+    # Get the client without further authentication.
+    ml_client2 = get_ml_client()
+
 
 if __name__ == "__main__":
     submit_to_azure_if_needed(
         snapshot_root_directory=Path(__file__).parents[2],
-        compute_cluster_name="ds2-with-id",
+        compute_cluster_name="lite-testing-ds2",
         strictly_aml_v1=True,
         submit_to_azureml=True,
     )