diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 63698d6a..81363508 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -89,7 +89,7 @@ jobs: # Test filtering by only mine cloudos job list --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --filter-only-mine --last-n-jobs 10 # Test filtering by queue - #cloudos job list --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --filter-queue "cost_saving_standard_nextflow" --last-n-jobs 10 + cloudos job list --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --filter-queue "cost_saving_standard_nextflow" --last-n-jobs 10 job_details: needs: job_run_and_status runs-on: ubuntu-latest @@ -847,3 +847,41 @@ jobs: CLOUDOS_URL: "https://cloudos.lifebit.ai" run: | cloudos job workdir --delete --yes --cloudos-url $CLOUDOS_URL --apikey $CLOUDOS_TOKEN --workspace-id $CLOUDOS_WORKSPACE_ID --job-id ${{ needs.job_resume.outputs.job_id }} + params_file: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.9" ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: pip + cache-dependency-path: setup.py + - name: Install dependencies + run: | + pip install -e . + - name: Run tests + env: + CLOUDOS_TOKEN: ${{ secrets.CLOUDOS_TOKEN_ADAPT }} + CLOUDOS_WORKSPACE_ID: ${{ secrets.CLOUDOS_WORKSPACE_ID_ADAPT }} + CLOUDOS_URL: "https://cloudos.lifebit.ai" + PROJECT_NAME: "cloudos-cli-tests" + WORKFLOW: "GH-rnatoy" + PARAMS_FILE: "Data/rnatoy_params.json" + JOB_NAME_BASE: "cloudos_cli_CI_test_params_file" + COMMIT_HASH: ${{ github.event.pull_request.head.sha || github.sha }} + PR_NUMBER: ${{ github.event.pull_request.number || 'manual' }} + run: | + cloudos job run \ + --cloudos-url $CLOUDOS_URL \ + --apikey $CLOUDOS_TOKEN \ + --workspace-id $CLOUDOS_WORKSPACE_ID \ + --project-name "$PROJECT_NAME" \ + --workflow-name "$WORKFLOW" \ + --job-name "$JOB_NAME_BASE|GitHubCommit:${COMMIT_HASH:0:6}|PR-NUMBER:$PR_NUMBER" \ + --instance-type m4.xlarge \ + --params-file "$PARAMS_FILE" \ + --wait-completion \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index faa444ca..6a590e18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ ## lifebit-ai/cloudos-cli: changelog +## v2.81.1 (2026-02-24) + +### Patch + +- Adds Azure Blob support for `job run --params-file` using `az://.blob.core.windows.net//` +- Updates `--params-file` docs/help and parsing validation behavior for Azure payload generation +- Updates `--filter-queue` after API changes + ## v2.81.0 (2026-02-19) ### Feat diff --git a/README.md b/README.md index f01e334a..b0b25ce3 100644 --- a/README.md +++ b/README.md @@ -519,6 +519,16 @@ cloudos job run \ --resumable ``` +Azure Blob example: + +```bash +cloudos job run \ + --profile my_profile \ + --workflow-name rnatoy \ + --params-file az://6480f3db916489d248956a5f.blob.core.windows.net/cloudos-66607e71e8cffa9985592c10/dataset/697b7341c69bacdd8b0b700d/rnatoy_params.json \ + --resumable +``` + Example JSON params file: ```json @@ -542,7 +552,7 @@ annot: > NOTE: options `--job-config`, `--parameter` and `--params-file` are completely compatible and complementary, so you can use a `--job-config` or `--params-file` and add additional parameters using `--parameter` in the same call. -> NOTE: when using `--params-file`, the value must be an S3 URI or a File Explorer relative path (e.g., `Data/file.json`). Local file paths are not supported. +> NOTE: when using `--params-file`, the value must be an S3 URI, an Azure Blob URI (`az://.blob.core.windows.net//`), or a File Explorer relative path (e.g., `Data/file.json`). Local file paths are not supported. If everything went well, you should see something like: diff --git a/cloudos_cli/_version.py b/cloudos_cli/_version.py index 61237e90..4111b533 100644 --- a/cloudos_cli/_version.py +++ b/cloudos_cli/_version.py @@ -1 +1 @@ -__version__ = '2.81.0' +__version__ = '2.81.1' diff --git a/cloudos_cli/clos.py b/cloudos_cli/clos.py index 13efc51a..99e751ec 100644 --- a/cloudos_cli/clos.py +++ b/cloudos_cli/clos.py @@ -1236,7 +1236,7 @@ def get_job_list(self, workspace_id, last_n_jobs=None, page=None, page_size=None if not queue_id: raise ValueError(f"Queue with name '{filter_queue}' not found in workspace '{workspace_id}'") - all_jobs = [job for job in all_jobs if job.get("batch", {}).get("jobQueue", {}).get("id") == queue_id] + all_jobs = [job for job in all_jobs if job.get("batch", {}).get("jobQueue", {}) == queue_id] else: raise ValueError(f"The environment is not a batch environment so queues do not exist. Please remove the --filter-queue option.") except Exception as e: @@ -1284,7 +1284,7 @@ def process_job_list(r, all_fields=False): 'nextflowVersion', 'batch.enabled', 'storageSizeInGb', - 'batch.jobQueue.id', + 'batch.jobQueue', 'usesFusionFileSystem' ] df_full = pd.json_normalize(r) @@ -1416,7 +1416,7 @@ def calculate_runtime(row): "nextflowVersion": "Nextflow version", "batch.enabled": "Executor", "storageSizeInGb": "Storage size", - "batch.jobQueue.id": "Job queue ID", + "batch.jobQueue": "Job queue ID", "usesFusionFileSystem": "Accelerated file staging" } diff --git a/cloudos_cli/jobs/cli.py b/cloudos_cli/jobs/cli.py index 9e1c3d7e..53bac81b 100644 --- a/cloudos_cli/jobs/cli.py +++ b/cloudos_cli/jobs/cli.py @@ -68,7 +68,8 @@ def job(): help=('A file containing the parameters to pass to the job call. ' + 'It should be a .json or .yaml file with a dictionary structure ' + 'where keys are parameter names and values are parameter values. ' + - 'This expects an S3 URI file path or a File Explorer relative path ' + + 'This expects an S3 URI, Azure Blob URI (az://.blob.core.windows.net//) ' + + 'or a File Explorer relative path ' + '(e.g., Data/params_file.json) and does not work with local files.')) @click.option('-p', '--parameter', diff --git a/cloudos_cli/jobs/job.py b/cloudos_cli/jobs/job.py index 981ae63a..9c28d453 100644 --- a/cloudos_cli/jobs/job.py +++ b/cloudos_cli/jobs/job.py @@ -11,6 +11,7 @@ from pathlib import Path from urllib.parse import urlparse import base64 +import re from cloudos_cli.utils.array_job import classify_pattern, get_file_or_folder_id, extract_project import os import click @@ -183,11 +184,7 @@ def build_parameters_file_payload(self, params_file): if len(params_file) != 1: raise ValueError('Please, provide a single file for --params-file.') params_file = params_file[0] - - ext = os.path.splitext(params_file)[1].lower() allowed_ext = {'.json', '.yaml', '.yml'} - if ext not in allowed_ext: - raise ValueError('Please, provide a .json or .yaml file for --params-file.') if params_file.startswith('s3://'): parsed = urlparse(params_file) @@ -196,6 +193,9 @@ def build_parameters_file_payload(self, params_file): if not bucket or not s3_key: raise ValueError('Invalid S3 URL. Please, provide a full s3://bucket/key path.') name = s3_key.rstrip('/').split('/')[-1] + ext = os.path.splitext(name)[1].lower() + if ext not in allowed_ext: + raise ValueError('Please, provide a .json or .yaml file for --params-file.') return { "parametersFile": { "dataItemEmbedded": { @@ -209,9 +209,57 @@ def build_parameters_file_payload(self, params_file): } } + if params_file.startswith('az://'): + parsed = urlparse(params_file) + if parsed.query: + raise ValueError('Azure URL with query parameters is not supported for --params-file.') + + host = parsed.netloc + if not host.endswith('.blob.core.windows.net'): + raise ValueError('Invalid Azure URL. Expected format: az://.blob.core.windows.net//') + + blob_storage_account_name = host[:-len('.blob.core.windows.net')] + path_parts = parsed.path.lstrip('/').split('/', 1) + if len(path_parts) != 2: + raise ValueError('Invalid Azure URL. Expected format: az://.blob.core.windows.net//') + + blob_container_name, blob_name = path_parts + blob_name = blob_name.rstrip('/') + if not blob_storage_account_name or not blob_container_name or not blob_name: + raise ValueError('Invalid Azure URL. Expected format: az://.blob.core.windows.net//') + + blob_leaf = blob_name.split('/')[-1] + uuid_suffix_match = re.match(r'^(.*)_([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})$', blob_leaf) + if uuid_suffix_match: + name = uuid_suffix_match.group(1) + else: + name = blob_leaf + + ext = os.path.splitext(name)[1].lower() + if ext not in allowed_ext: + raise ValueError('Please, provide a .json or .yaml file for --params-file.') + + return { + "parametersFile": { + "dataItemEmbedded": { + "data": { + "name": name, + "blobStorageAccountName": blob_storage_account_name, + "blobContainerName": blob_container_name, + "blobName": blob_name + }, + "type": "AzureBlobFile" + } + } + } + if not self.project_name: raise ValueError('Please, provide --project-name to resolve --params-file paths.') + ext = os.path.splitext(params_file)[1].lower() + if ext not in allowed_ext: + raise ValueError('Please, provide a .json or .yaml file for --params-file.') + normalized_path = params_file.lstrip('/') allowed_prefixes = ('Data', 'Analyses Results', 'Cohorts') if not normalized_path.startswith(allowed_prefixes): diff --git a/tests/test_clos/test_get_job_list_filtering.py b/tests/test_clos/test_get_job_list_filtering.py index 044f9bf4..a29167f0 100644 --- a/tests/test_clos/test_get_job_list_filtering.py +++ b/tests/test_clos/test_get_job_list_filtering.py @@ -23,7 +23,7 @@ "user": {"id": USER_ID, "name": "Test User"}, "project": {"id": PROJECT_ID, "name": "test-project"}, "workflow": {"id": WORKFLOW_ID, "name": "test-workflow"}, - "batch": {"jobQueue": {"id": QUEUE_ID}} + "batch": {"jobQueue": QUEUE_ID} }, { "_id": "job2", @@ -32,7 +32,7 @@ "user": {"id": "other_user_id", "name": "Other User"}, "project": {"id": "other_project_id", "name": "other-project"}, "workflow": {"id": "other_workflow_id", "name": "other-workflow"}, - "batch": {"jobQueue": {"id": "other_queue_id"}} + "batch": {"jobQueue": "other_queue_id"} } ] } diff --git a/tests/test_clos/test_process_job_list.py b/tests/test_clos/test_process_job_list.py index 5066364d..af8f2ced 100644 --- a/tests/test_clos/test_process_job_list.py +++ b/tests/test_clos/test_process_job_list.py @@ -27,12 +27,57 @@ def mocked_requests_get(): def test_process_job_list_output_correct_shape(mocked_requests_get,): df = Cloudos.process_job_list(mocked_requests_get, all_fields=False) - assert df.shape == output_df.shape + selected_columns = ['status', + 'name', + 'project.name', + 'user.name', + 'user.surname', + 'workflow.name', + '_id', + 'startTime', + 'endTime', + 'createdAt', + 'updatedAt', + 'revision.commit', + 'realInstancesExecutionCost', + 'masterInstance.usedInstance.type', + 'storageMode', + 'workflow.repository.url', + 'nextflowVersion', + 'batch.enabled', + 'storageSizeInGb', + 'batch.jobQueue', + 'usesFusionFileSystem'] + df_full = Cloudos.process_job_list(mocked_requests_get, all_fields=True) + expected_headers = [col for col in selected_columns if col in df_full.columns] + assert df.shape == (len(mocked_requests_get), len(expected_headers)) def test_process_job_list_output_correct_headers(mocked_requests_get): df = Cloudos.process_job_list(mocked_requests_get, all_fields=False) - correct_headers = list(output_df.columns) + selected_columns = ['status', + 'name', + 'project.name', + 'user.name', + 'user.surname', + 'workflow.name', + '_id', + 'startTime', + 'endTime', + 'createdAt', + 'updatedAt', + 'revision.commit', + 'realInstancesExecutionCost', + 'masterInstance.usedInstance.type', + 'storageMode', + 'workflow.repository.url', + 'nextflowVersion', + 'batch.enabled', + 'storageSizeInGb', + 'batch.jobQueue', + 'usesFusionFileSystem'] + df_full = Cloudos.process_job_list(mocked_requests_get, all_fields=True) + correct_headers = [col for col in selected_columns if col in df_full.columns] actual_headers = list(df.columns) assert correct_headers == actual_headers diff --git a/tests/test_jobs/test_convert_nextflow_to_json.py b/tests/test_jobs/test_convert_nextflow_to_json.py index dfd8c7c2..29d95d78 100644 --- a/tests/test_jobs/test_convert_nextflow_to_json.py +++ b/tests/test_jobs/test_convert_nextflow_to_json.py @@ -186,6 +186,57 @@ def test_params_file_payload_s3(): } +def test_params_file_payload_azure_blob(): + job = Job( + "https://cloudos.example", + "test_api_key", + None, + "workspace_id", + "project", + "workflow", + project_id=param_dict["project_id"], + workflow_id=param_dict["workflow_id"] + ) + payload = job.build_parameters_file_payload( + "az://6480f3db916489d248956a5f.blob.core.windows.net/" + "cloudos-66607e71e8cffa9985592c10/dataset/697b7341c69bacdd8b0b700d/" + "rnatoy_params.json_137531fe-c19a-44c6-9e30-2d6dcb371072" + ) + assert payload == { + "parametersFile": { + "dataItemEmbedded": { + "data": { + "name": "rnatoy_params.json", + "blobStorageAccountName": "6480f3db916489d248956a5f", + "blobContainerName": "cloudos-66607e71e8cffa9985592c10", + "blobName": "dataset/697b7341c69bacdd8b0b700d/rnatoy_params.json_137531fe-c19a-44c6-9e30-2d6dcb371072" + }, + "type": "AzureBlobFile" + } + } + } + + +def test_params_file_payload_azure_blob_with_query_not_supported(): + job = Job( + "https://cloudos.example", + "test_api_key", + None, + "workspace_id", + "project", + "workflow", + project_id=param_dict["project_id"], + workflow_id=param_dict["workflow_id"] + ) + with pytest.raises(ValueError) as excinfo: + job.build_parameters_file_payload( + "az://6480f3db916489d248956a5f.blob.core.windows.net/" + "cloudos-66607e71e8cffa9985592c10/dataset/697b7341c69bacdd8b0b700d/" + "rnatoy_params.json_137531fe-c19a-44c6-9e30-2d6dcb371072?sv=token" + ) + assert "query parameters is not supported" in str(excinfo.value) + + def test_params_file_payload_file_explorer(monkeypatch): job = Job( "https://cloudos.example",