From f4516158c295b4f38696266c960d7e5ec738c9d1 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Fri, 7 Feb 2025 14:26:56 -0500 Subject: [PATCH 01/23] :sparkles: Mutation to upsert global IDs --- .../dewrangle/graphql/study/mutations.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/d3b_api_client_cli/dewrangle/graphql/study/mutations.py b/d3b_api_client_cli/dewrangle/graphql/study/mutations.py index 51c7cf2..a30888b 100644 --- a/d3b_api_client_cli/dewrangle/graphql/study/mutations.py +++ b/d3b_api_client_cli/dewrangle/graphql/study/mutations.py @@ -66,3 +66,46 @@ } """ ) + +upsert_global_descriptors = gql( + """ + mutation globalDescriptorUpsertMutation( + $input: GlobalDescriptorUpsertInput! + ) { + globalDescriptorUpsert(input: $input) { + errors { + ... on MutationError { + __typename + message + field + } + } + job { + id + completedAt + globalDescriptors { + totalCount + edges { + node { + descriptor + globalId + fhirResourceType + } + } + } + errors { + totalCount + edges { + node { + name + message + isFatal + } + } + } + } + } + } + """ +) + From 1117cb4c10bc02f333463fd717fa65b18e356e09 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Fri, 7 Feb 2025 14:27:50 -0500 Subject: [PATCH 02/23] :recycle: Add upsert_global_descriptors to study API --- .../dewrangle/graphql/study/__init__.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/d3b_api_client_cli/dewrangle/graphql/study/__init__.py b/d3b_api_client_cli/dewrangle/graphql/study/__init__.py index 2d1a6da..fba6a64 100644 --- a/d3b_api_client_cli/dewrangle/graphql/study/__init__.py +++ b/d3b_api_client_cli/dewrangle/graphql/study/__init__.py @@ -5,6 +5,7 @@ import os import logging from pprint import pformat +from typing import Optional import gql @@ -31,6 +32,30 @@ DEWRANGLE_MAX_PAGE_SIZE = config["dewrangle"]["pagination"]["max_page_size"] +def upsert_global_descriptors( + study_file_id: str, + skip_unavailable_descriptors: Optional[bool] = True +) -> dict: + """ + Trigger the operation to upsert global descriptors in Dewrangle + + Args: + - skip_unavailable_descriptors: If true any errors due to a descriptor + """ + logger.info( + "🛸 Upsert global descriptors for study file: %s", study_file_id + ) + variables = { + "input": { + "studyFileId": study_file_id, + "skipUnavailableDescriptors": skip_unavailable_descriptors, + } + } + resp = exec_query(mutations.upsert_global_descriptors, variables=variables) + + return resp + + def upsert_study( variables: dict, organization_id: str, study_id: str = None ) -> dict: From 11a822880050ac9489a413f1f77949aa0756a692 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Fri, 7 Feb 2025 14:28:24 -0500 Subject: [PATCH 03/23] :sparkles: REST client methods to upload study file, upsert global IDs --- d3b_api_client_cli/dewrangle/rest/files.py | 142 +++++++++++++++++---- 1 file changed, 119 insertions(+), 23 deletions(-) diff --git a/d3b_api_client_cli/dewrangle/rest/files.py b/d3b_api_client_cli/dewrangle/rest/files.py index 977e069..6ca7463 100644 --- a/d3b_api_client_cli/dewrangle/rest/files.py +++ b/d3b_api_client_cli/dewrangle/rest/files.py @@ -3,10 +3,13 @@ """ from typing import Optional +from pprint import pformat import logging import os import cgi +from d3b_api_client_cli.dewrangle.graphql import study as study_api + from d3b_api_client_cli.config import ( DEWRANGLE_DEV_PAT, config, @@ -30,13 +33,124 @@ def _filename_from_headers(headers: dict) -> str: return params.get("filename") +def upsert_global_ids( + study_global_id: Optional[str], + dewrangle_study_id: Optional[str], + filepath: str, + skip_unavailable_descriptors: Optional[bool] = True, +): + """ + Upsert global IDs to Dewrangle + + This happens in two steps: + 1. Upload the global descriptor csv file to the study file endpoint + 2. Invoke the graphQL mutation to upsert global descriptors + + Args: + - skip_unavailable_descriptors (bool): If true any errors due to a + descriptor already having a global ID assigned will be ignored + + Raise: + ValueError if the study does not exist in Dewrangle + """ + if dewrangle_study_id: + study = study_api.read_study(dewrangle_study_id) + else: + study = study_api.find_study(study_global_id) + + if not study: + raise ValueError( + f"❌ Study " + f"{study_global_id if study_global_id else dewrangle_study_id}" + " does not exist in Dewrangle. Aborting upsert_global_ids" + ) + + study_global_id = study["globalId"] + dewrangle_study_id = study["id"] + + logger.info( + "🛸 Upsert global IDs to Dewrangle for study %s" + ) + + filepath = os.path.abspath(filepath) + base_url = config["dewrangle"]["base_url"] + endpoint_template = config["dewrangle"]["endpoints"]["rest"]["study_file"] + endpoint = endpoint_template.format( + dewrangle_study_id=dewrangle_study_id, + filename=os.path.split(filepath)[-1], + ) + + url = f"{base_url}/{endpoint}" + logger.info("🛸 POST global IDs file %s to Dewrangle %s", filepath, url) + + result = upload_study_file(dewrangle_study_id, filepath) + study_file_id = result["id"] + + # Trigger global descriptor upsert mutation + resp = study_api.upsert_global_descriptors( + study_file_id, + skip_unavailable_descriptors=skip_unavailable_descriptors + ) + result = resp["globalDescriptorUpsert"] + job_id = result["job"]["id"] + + logger.info( + "✅ Completed request to upsert global IDs. Job ID: %s", job_id) + + return result + + +def upload_study_file(dewrangle_study_id: str, filepath: str): + """ + Upload a CSV file to Dewrangle's study file endpoint + """ + filepath = os.path.abspath(filepath) + base_url = config["dewrangle"]["base_url"] + endpoint_template = config["dewrangle"]["endpoints"]["rest"]["study_file"] + endpoint = endpoint_template.format( + dewrangle_study_id=dewrangle_study_id, + filename=os.path.split(filepath)[-1], + ) + url = f"{base_url}/{endpoint}" + + return upload_file(url) + + +def upload_file( + url: str, + filepath: Optional[str] = None, + params: Optional[dict] = None +): + """ + Upload a file to Dewrangle + """ + logger.info("🛸 Starting upload of %s to %s", filepath, url) + with open(filepath, "rb") as jsonfile: + headers = {"x-api-key": DEWRANGLE_DEV_PAT} + resp = send_request( + "post", + url, + headers=headers, + data=jsonfile, + params=params, + # Set timeout to infinity so that uploads don't timeout + timeout=-1, + ) + + logger.info("✅ Completed upload: %s", os.path.split(filepath)[-1]) + logger.info(pformat(resp.json())) + + return resp.json() + + def download_file( url: str, output_dir: Optional[str] = None, filepath: Optional[str] = None, + params: Optional[dict] = None ) -> str: """ - Download study's global IDs from Dewrangle + Download a file from Dewrangle If filepath is provided, download content to that filepath @@ -47,12 +161,14 @@ def download_file( filepath - if the downloaded file was not empty None - if the downloaded file was empty """ - logger.info("🛸 Start downloading file from Dewrangle ...") + logger.info("🛸 Start downloading file from Dewrangle %s ...", url) - headers = {"x-api-key": DEWRANGLE_DEV_PAT, "content-type": CSV_CONTENT_TYPE} + headers = {"x-api-key": DEWRANGLE_DEV_PAT, + "content-type": CSV_CONTENT_TYPE} resp = send_request( "get", url, + params=params, headers=headers, ) if not filepath: @@ -88,23 +204,3 @@ def download_job_errors( url = f"{DEWRANGLE_BASE_URL}{endpoint}" return download_file(url, filepath=filepath, output_dir=output_dir) - - -def download_hash_report( - job_id: str, - output_dir: Optional[str] = None, - filepath: Optional[str] = None, -) -> str: - """ - Download a volume hash report from Dewrangle - - See download_file for details - """ - # Ensure env vars are set - check_dewrangle_http_config() - - endpoint_template = config["dewrangle"]["endpoints"]["rest"]["hash_report"] - endpoint = endpoint_template.format(job_id=job_id) - url = f"{DEWRANGLE_BASE_URL}{endpoint}" - - return download_file(url, filepath=filepath, output_dir=output_dir) From 9638996bb617894915e1c3d2d8e24f1df9afd660 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Tue, 11 Feb 2025 09:12:48 -0500 Subject: [PATCH 04/23] :fire: Rm unit test for download hash report --- tests/unit/dewrangle/test_download.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/tests/unit/dewrangle/test_download.py b/tests/unit/dewrangle/test_download.py index d01c07d..6005a28 100644 --- a/tests/unit/dewrangle/test_download.py +++ b/tests/unit/dewrangle/test_download.py @@ -1,5 +1,5 @@ """ -Test downloading volume hash files (error, hash report) from Dewrangle +Test downloading volume hash files (job errors) from Dewrangle """ import os @@ -84,27 +84,8 @@ def test_download_job_errors(mocker): ) -def test_download_hash_report(mocker): - """ - Test download Dewrangle volume hash report - """ - mock_download_file = mocker.patch( - "d3b_api_client_cli.dewrangle.rest.files.download_file" - ) - - files.download_hash_report("job-id", output_dir="output") - - endpoint_template = config["dewrangle"]["endpoints"]["rest"]["hash_report"] - endpoint = endpoint_template.format(job_id="job-id") - url = f"{DEWRANGLE_BASE_URL.rstrip('/')}/{endpoint.lstrip('/')}" - - mock_download_file.assert_called_with( - url, output_dir="output", filepath=None - ) - - @pytest.mark.parametrize( - "download_method", [files.download_job_errors, files.download_hash_report] + "download_method", [files.download_job_errors] ) @pytest.mark.parametrize( "token,url, expected_msg", From 9ef44abb15544e55cf3e1660fe0799763eb8c165 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Tue, 11 Feb 2025 09:43:10 -0500 Subject: [PATCH 05/23] :truck: Mv global ID methods to own module --- d3b_api_client_cli/dewrangle/global_id.py | 96 ++++++++++++++++++++ d3b_api_client_cli/dewrangle/rest/files.py | 101 ++++----------------- 2 files changed, 114 insertions(+), 83 deletions(-) create mode 100644 d3b_api_client_cli/dewrangle/global_id.py diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py new file mode 100644 index 0000000..a4f82b9 --- /dev/null +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -0,0 +1,96 @@ +""" +Dewrangle functions to create, update, remove global descriptors in Dewrangle +""" + +from typing import Optional +import logging +import os + +from d3b_api_client_cli.dewrangle.graphql import study as study_api + +from d3b_api_client_cli.config import ( + config, +) +from d3b_api_client_cli.dewrangle.rest import upload_study_file +from d3b_api_client_cli.utils import timestamp + +logger = logging.getLogger(__name__) + +CSV_CONTENT_TYPE = "text/csv" +DEWRANGLE_BASE_URL = config["dewrangle"]["base_url"].rstrip("/") +DEFAULT_FILENAME = f"dewrangle-file-{timestamp()}.csv" + + +def upsert_global_descriptors( + filepath: str, + study_global_id: Optional[str], + dewrangle_study_id: Optional[str], + skip_unavailable_descriptors: Optional[bool] = True, +): + """ + Upsert global IDs to Dewrangle + + This happens in two steps: + 1. Upload the global descriptor csv file to the study file endpoint + 2. Invoke the graphQL mutation to upsert global descriptors + + Args: + - skip_unavailable_descriptors (bool): If true any errors due to a + descriptor already having a global ID assigned will be ignored + + Options: + - study_global_id - Provide this when you don't know the study's + GraphQL ID in Dewrangle. + - study_id - Study GraphQL ID in Dewrangle + + You must provide either the study_global_id OR the study_id but not both + + Raise: + ValueError if the study does not exist in Dewrangle + """ + if dewrangle_study_id: + study = study_api.read_study(dewrangle_study_id) + else: + study = study_api.find_study(study_global_id) + + if not study: + raise ValueError( + f"❌ Study " + f"{study_global_id if study_global_id else dewrangle_study_id}" + " does not exist in Dewrangle. Aborting" + ) + + study_global_id = study["globalId"] + dewrangle_study_id = study["id"] + + logger.info( + "🛸 Upsert global IDs in %s to Dewrangle for study %s", + filepath, study_global_id + ) + + filepath = os.path.abspath(filepath) + base_url = config["dewrangle"]["base_url"] + endpoint_template = config["dewrangle"]["endpoints"]["rest"]["study_file"] + endpoint = endpoint_template.format( + dewrangle_study_id=dewrangle_study_id, + filename=os.path.split(filepath)[-1], + ) + + url = f"{base_url}/{endpoint}" + logger.info("🛸 POST global IDs file %s to Dewrangle %s", filepath, url) + + result = upload_study_file(dewrangle_study_id, filepath) + study_file_id = result["id"] + + # Trigger global descriptor upsert mutation + resp = study_api.upsert_global_descriptors( + study_file_id, + skip_unavailable_descriptors=skip_unavailable_descriptors + ) + result = resp["globalDescriptorUpsert"] + job_id = result["job"]["id"] + + logger.info( + "✅ Completed request to upsert global IDs. Job ID: %s", job_id) + + return result diff --git a/d3b_api_client_cli/dewrangle/rest/files.py b/d3b_api_client_cli/dewrangle/rest/files.py index 6ca7463..5fa8ae8 100644 --- a/d3b_api_client_cli/dewrangle/rest/files.py +++ b/d3b_api_client_cli/dewrangle/rest/files.py @@ -33,89 +33,6 @@ def _filename_from_headers(headers: dict) -> str: return params.get("filename") -def upsert_global_ids( - study_global_id: Optional[str], - dewrangle_study_id: Optional[str], - filepath: str, - skip_unavailable_descriptors: Optional[bool] = True, -): - """ - Upsert global IDs to Dewrangle - - This happens in two steps: - 1. Upload the global descriptor csv file to the study file endpoint - 2. Invoke the graphQL mutation to upsert global descriptors - - Args: - - skip_unavailable_descriptors (bool): If true any errors due to a - descriptor already having a global ID assigned will be ignored - - Raise: - ValueError if the study does not exist in Dewrangle - """ - if dewrangle_study_id: - study = study_api.read_study(dewrangle_study_id) - else: - study = study_api.find_study(study_global_id) - - if not study: - raise ValueError( - f"❌ Study " - f"{study_global_id if study_global_id else dewrangle_study_id}" - " does not exist in Dewrangle. Aborting upsert_global_ids" - ) - - study_global_id = study["globalId"] - dewrangle_study_id = study["id"] - - logger.info( - "🛸 Upsert global IDs to Dewrangle for study %s" - ) - - filepath = os.path.abspath(filepath) - base_url = config["dewrangle"]["base_url"] - endpoint_template = config["dewrangle"]["endpoints"]["rest"]["study_file"] - endpoint = endpoint_template.format( - dewrangle_study_id=dewrangle_study_id, - filename=os.path.split(filepath)[-1], - ) - - url = f"{base_url}/{endpoint}" - logger.info("🛸 POST global IDs file %s to Dewrangle %s", filepath, url) - - result = upload_study_file(dewrangle_study_id, filepath) - study_file_id = result["id"] - - # Trigger global descriptor upsert mutation - resp = study_api.upsert_global_descriptors( - study_file_id, - skip_unavailable_descriptors=skip_unavailable_descriptors - ) - result = resp["globalDescriptorUpsert"] - job_id = result["job"]["id"] - - logger.info( - "✅ Completed request to upsert global IDs. Job ID: %s", job_id) - - return result - - -def upload_study_file(dewrangle_study_id: str, filepath: str): - """ - Upload a CSV file to Dewrangle's study file endpoint - """ - filepath = os.path.abspath(filepath) - base_url = config["dewrangle"]["base_url"] - endpoint_template = config["dewrangle"]["endpoints"]["rest"]["study_file"] - endpoint = endpoint_template.format( - dewrangle_study_id=dewrangle_study_id, - filename=os.path.split(filepath)[-1], - ) - url = f"{base_url}/{endpoint}" - - return upload_file(url) - - def upload_file( url: str, filepath: Optional[str] = None, @@ -186,6 +103,24 @@ def download_file( return filepath +def upload_study_file(dewrangle_study_id: str, filepath: str): + """ + Upload a CSV file to Dewrangle's study file endpoint + """ + filepath = os.path.abspath(filepath) + base_url = config["dewrangle"]["base_url"] + endpoint_template = config["dewrangle"]["endpoints"]["rest"]["study_file"] + endpoint = endpoint_template.format( + dewrangle_study_id=dewrangle_study_id, + filename=os.path.split(filepath)[-1], + ) + url = f"{base_url}/{endpoint}" + + return upload_file(url) + + + + def download_job_errors( job_id: str, output_dir: Optional[str] = None, From 2cb178cb2ff91b9d8304b22946d989317a264c52 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Tue, 11 Feb 2025 09:43:39 -0500 Subject: [PATCH 06/23] :sparkles: Add upsert global desc CLI command --- .../cli/dewrangle/global_id_commands.py | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 d3b_api_client_cli/cli/dewrangle/global_id_commands.py diff --git a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py new file mode 100644 index 0000000..9953583 --- /dev/null +++ b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py @@ -0,0 +1,56 @@ +""" +All CLI commands related to creating, updating, and downloading global IDs +in Dewrangle +""" + +import logging +import click + +from d3b_api_client_cli.config import log +from d3b_api_client_cli.dewrangle.global_id import ( + upsert_global_descriptors as _upsert_global_descriptors, +) + +logger = logging.getLogger(__name__) + + +@click.command() +@click.option( + "--study-global-id", + help="The global ID of the study in Dewrangle. You must provide either " + "the global ID of the study OR the GraphQL ID of the study but not both" +) +@click.option( + "--study-id", + help="The GraphQL ID of the study in Dewrangle. You must provide either " + "the global ID of the study OR the GraphQL ID of the study but not both" +) +@click.argument( + "filepath", + type=click.Path(exists=False, file_okay=True, dir_okay=False), +) +def upsert_global_descriptors(filepath, study_id, study_global_id): + """ + Upsert global IDs in Dewrangle for a study. + + In order to create new global IDs provide a CSV file with the columns: + descriptor, fhirResourceType + + In order to update existing global IDs provide a CSV file with the columns: + descriptor, fhirResourceType, globalId + + \b + Arguments: + \b + filepath - Path to the file with global IDs and descriptors + """ + + log.init_logger() + + if (not study_id) and (not study_global_id): + raise click.BadParameter( + "❌ You must provide either the study's global ID in Dewrangle OR " + "the study's GraphQL ID in Dewrangle" + ) + + return _upsert_global_descriptors(filepath, study_global_id, study_id) From e65437b8dc1444ead5068830c2d1b0de2a97baf3 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Tue, 11 Feb 2025 10:21:42 -0500 Subject: [PATCH 07/23] :sparkles: Method to download global ID descriptors --- d3b_api_client_cli/dewrangle/global_id.py | 2 + d3b_api_client_cli/dewrangle/rest/files.py | 84 ++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py index a4f82b9..4ec7ff9 100644 --- a/d3b_api_client_cli/dewrangle/global_id.py +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -89,6 +89,8 @@ def upsert_global_descriptors( ) result = resp["globalDescriptorUpsert"] job_id = result["job"]["id"] + result["study_global_id"] = study_global_id + result["study_id"] = study["id"] logger.info( "✅ Completed request to upsert global IDs. Job ID: %s", job_id) diff --git a/d3b_api_client_cli/dewrangle/rest/files.py b/d3b_api_client_cli/dewrangle/rest/files.py index 5fa8ae8..0866acd 100644 --- a/d3b_api_client_cli/dewrangle/rest/files.py +++ b/d3b_api_client_cli/dewrangle/rest/files.py @@ -3,6 +3,7 @@ """ from typing import Optional +from enum import Enum from pprint import pformat import logging import os @@ -14,6 +15,7 @@ DEWRANGLE_DEV_PAT, config, check_dewrangle_http_config, + ROOT_DATA_DIR ) from d3b_api_client_cli.utils import send_request, timestamp @@ -24,6 +26,14 @@ DEFAULT_FILENAME = f"dewrangle-file-{timestamp()}.csv" +class GlobalIdDescriptorOptions(Enum): + """ + Used in download_global_descriptors + """ + DOWNLOAD_ALL_DESC = "all" + DOWNLOAD_MOST_RECENT = "most-recent" + + def _filename_from_headers(headers: dict) -> str: """ Helper to get the filename from the Content-Disposition @@ -80,6 +90,10 @@ def download_file( """ logger.info("🛸 Start downloading file from Dewrangle %s ...", url) + if (not filepath) and (not output_dir): + output_dir = os.path.join(ROOT_DATA_DIR) + os.makedirs(output_dir, exist_ok=True) + headers = {"x-api-key": DEWRANGLE_DEV_PAT, "content-type": CSV_CONTENT_TYPE} resp = send_request( @@ -119,6 +133,76 @@ def upload_study_file(dewrangle_study_id: str, filepath: str): return upload_file(url) +def download_global_descriptors( + dewrangle_study_id: str, + job_id: Optional[str] = None, + descriptors: Optional[GlobalIdDescriptorOptions] = GlobalIdDescriptorOptions.DOWNLOAD_ALL_DESC.value, # noqa + filepath: Optional[str] = None, + output_dir: Optional[str] = None, +) -> str: + """ + Download study's global IDs from Dewrangle + + Args: + - dewrangle_study_id: GraphQL ID of study in Dewrangle + - filepath: GraphQL ID of study in Dewrangle + Options: + - job_id: The job ID returned from the upsert_global_descriptors + method. If this is provided, only global IDs from that + job will be returned. + + - descriptors: A query parameter that determines how many descriptors + will be returned for the global ID. + + If set to "all" return all descriptors associated + with the global ID + + If set to "most-recent" return the most recent + descriptor associated with the global ID + + - filepath: If filepath is provided, download content to that filepath + + - output_dir: If output_dir is provided, get filename from + Content-Disposition header and download the file to the + output directory with that filename + """ + study = study_api.read_study(dewrangle_study_id) + + if not study: + raise ValueError( + f"❌ Study {dewrangle_study_id}" + " does not exist in Dewrangle. Aborting" + ) + + study_global_id = study["globalId"] + + logger.info( + "🛸 Start downloading global IDs for study %s from Dewrangle ...", + study_global_id + ) + + filepath = os.path.abspath(filepath) + base_url = config["dewrangle"]["base_url"] + endpoint_template = config["dewrangle"]["endpoints"]["rest"]["global_id"] + endpoint = endpoint_template.format(dewrangle_study_id=dewrangle_study_id) + url = f"{base_url}/{endpoint}" + + params = {} + if job_id: + params.update({"job": job_id}) + if descriptors: + params.update({"descriptors": descriptors}) + + filepath = download_file( + url, + output_dir=output_dir, + filepath=filepath, + params=params + ) + + logger.info("✅ Completed download of global IDs: %s", filepath) + + return filepath def download_job_errors( From cee9287a4f8cd807989b4474bf66a2dc62716f07 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Tue, 11 Feb 2025 12:22:09 -0500 Subject: [PATCH 08/23] :sparkles: Method and cmd to upsert and download the resulting descriptors --- .../cli/dewrangle/global_id_commands.py | 140 +++++++++++++++++- d3b_api_client_cli/dewrangle/global_id.py | 62 +++++++- 2 files changed, 198 insertions(+), 4 deletions(-) diff --git a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py index 9953583..150f99d 100644 --- a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py +++ b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py @@ -3,17 +3,95 @@ in Dewrangle """ +import os import logging import click from d3b_api_client_cli.config import log +from d3b_api_client_cli.dewrangle.rest.files import GlobalIdDescriptorOptions from d3b_api_client_cli.dewrangle.global_id import ( upsert_global_descriptors as _upsert_global_descriptors, + download_global_descriptors as _download_global_descriptors, + upsert_and_download_global_descriptors as _upsert_and_download_global_descriptors, ) logger = logging.getLogger(__name__) +@click.command() +@click.option( + "--output-filepath", + type=click.Path(exists=False, file_okay=True, dir_okay=False), + help="If provided, download the file to this path. This takes " + "precedence over the --output-dir option" +) +@click.option( + "--output-dir", + default=os.getcwd(), + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="If provided, download the file with the default file name into " + "this directory" +) +@click.option( + "--descriptors", + type=click.Choice( + item.value for item in GlobalIdDescriptorOptions + ), + help="Which descriptor(s) for each global ID to download. Either download" + " all descriptors for each global ID or just the most recent" +) +@click.option( + "--study-global-id", + help="The global ID of the study in Dewrangle. You must provide either " + "the global ID of the study OR the GraphQL ID of the study but not both" +) +@click.option( + "--study-id", + help="The GraphQL ID of the study in Dewrangle. You must provide either " + "the global ID of the study OR the GraphQL ID of the study but not both" +) +@click.argument( + "input_filepath", + type=click.Path(exists=False, file_okay=True, dir_okay=False), +) +def upsert_and_download_global_descriptors( + input_filepath, study_id, study_global_id, descriptors, output_dir, + output_filepath +): + """ + Send request to upsert global ID descriptors in Dewrangle and + download the resulting global ID descriptors. + + In order to create new global IDs provide a CSV file with the columns: + descriptor, fhirResourceType + + In order to update existing global IDs provide a CSV file with the columns: + descriptor, fhirResourceType, globalId + + \b + Arguments: + \b + input_filepath - Path to the file with global IDs and descriptors + """ + + log.init_logger() + + if (not study_id) and (not study_global_id): + raise click.BadParameter( + "❌ You must provide either the study's global ID in Dewrangle OR " + "the study's GraphQL ID in Dewrangle" + ) + + return _upsert_and_download_global_descriptors( + input_filepath, + study_global_id=study_global_id, + dewrangle_study_id=study_id, + descriptors=descriptors, + output_dir=output_dir, + output_filepath=output_filepath, + ) + + @click.command() @click.option( "--study-global-id", @@ -31,7 +109,7 @@ ) def upsert_global_descriptors(filepath, study_id, study_global_id): """ - Upsert global IDs in Dewrangle for a study. + Upsert global ID descriptors in Dewrangle for a study. In order to create new global IDs provide a CSV file with the columns: descriptor, fhirResourceType @@ -54,3 +132,63 @@ def upsert_global_descriptors(filepath, study_id, study_global_id): ) return _upsert_global_descriptors(filepath, study_global_id, study_id) + + +@click.command() +@click.option( + "--output-dir", + default=os.getcwd(), + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="If provided, download the file with the default file name into " + "this directory" +) +@click.option( + "--descriptors", + type=click.Choice( + item.value for item in GlobalIdDescriptorOptions + ), + help="Which descriptor(s) for each global ID to download. Either download" + " all descriptors for each global ID or just the most recent" +) +@click.option( + "--job-id", + help="Dewrangle job id from the upsert_global_descriptors cmd" +) +@click.option( + "--study-global-id", + help="The global ID of the study in Dewrangle. You must provide either " + "the global ID of the study OR the GraphQL ID of the study but not both" +) +@click.option( + "--study-id", + help="The GraphQL ID of the study in Dewrangle. You must provide either " + "the global ID of the study OR the GraphQL ID of the study but not both" +) +@click.option( + "--filepath", + type=click.Path(exists=False, file_okay=True, dir_okay=False), + help="If provided, download the file to this filepath. This takes " + "precedence over --output-dir" +) +def download_global_descriptors( + filepath, study_id, study_global_id, job_id, descriptors, output_dir +): + """ + Download global ID descriptors in Dewrangle for a study. + """ + + log.init_logger() + + if (not study_id) and (not study_global_id): + raise click.BadParameter( + "❌ You must provide either the study's global ID in Dewrangle OR " + "the study's GraphQL ID in Dewrangle" + ) + + return download_global_descriptors( + study_id, + filepath=filepath, + job_id=job_id, + descriptors=descriptors, + output_dir=output_dir, + ) diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py index 4ec7ff9..54ec4a1 100644 --- a/d3b_api_client_cli/dewrangle/global_id.py +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -10,8 +10,13 @@ from d3b_api_client_cli.config import ( config, + ROOT_DATA_DIR +) +from d3b_api_client_cli.dewrangle.rest import ( + upload_study_file, + download_global_descriptors, + GlobalIdDescriptorOptions ) -from d3b_api_client_cli.dewrangle.rest import upload_study_file from d3b_api_client_cli.utils import timestamp logger = logging.getLogger(__name__) @@ -21,6 +26,55 @@ DEFAULT_FILENAME = f"dewrangle-file-{timestamp()}.csv" +def upsert_and_download_global_descriptors( + input_filepath: str, + study_global_id: Optional[str], + dewrangle_study_id: Optional[str], + skip_unavailable_descriptors: Optional[bool] = True, + descriptors: Optional[GlobalIdDescriptorOptions] = GlobalIdDescriptorOptions.DOWNLOAD_ALL_DESC.value, # noqa + output_dir: Optional[str] = None, + output_filepath: Optional[str] = None, +) -> str: + """ + Send request to upsert global descriptors and download created/updated + global descriptors and ID from Dewrangle + + Args: + See upsert_global_descriptors and + d3b_api_client_cli.dewrangle.rest.download_global_descriptors + + Options: + See upsert_global_descriptors and + d3b_api_client_cli.dewrangle.rest.download_global_descriptors + + Returns: + filepath: path to downloaded global ID descriptors + """ + if not output_dir: + output_dir = os.path.join(ROOT_DATA_DIR) + os.makedirs(output_dir, exist_ok=True) + + result = upsert_global_descriptors( + input_filepath, + study_global_id=study_global_id, + dewrangle_study_id=dewrangle_study_id, + skip_unavailable_descriptors=skip_unavailable_descriptors, + ) + + job_id = result["job"]["id"] + dewrangle_study_id = result["study_id"] + + filepath = download_global_descriptors( + dewrangle_study_id=dewrangle_study_id, + job_id=job_id, + descriptors=descriptors, + filepath=output_filepath, + output_dir=output_dir, + ) + + return filepath + + def upsert_global_descriptors( filepath: str, study_global_id: Optional[str], @@ -28,7 +82,7 @@ def upsert_global_descriptors( skip_unavailable_descriptors: Optional[bool] = True, ): """ - Upsert global IDs to Dewrangle + Upsert global descriptors to Dewrangle This happens in two steps: 1. Upload the global descriptor csv file to the study file endpoint @@ -93,6 +147,8 @@ def upsert_global_descriptors( result["study_id"] = study["id"] logger.info( - "✅ Completed request to upsert global IDs. Job ID: %s", job_id) + "✅ Completed request to upsert global descriptors. Job ID: %s", + job_id + ) return result From 3939f7e180b599decb065e6401e72ebefa351cf0 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Tue, 11 Feb 2025 15:40:48 -0500 Subject: [PATCH 09/23] :seedling: Generate fake descriptors file for testing --- d3b_api_client_cli/cli/__init__.py | 12 +++ d3b_api_client_cli/cli/dewrangle/__init__.py | 1 + d3b_api_client_cli/cli/faker/__init__.py | 5 + .../cli/faker/global_id_commands.py | 83 ++++++++++++++++ d3b_api_client_cli/config/__init__.py | 25 +++++ d3b_api_client_cli/faker/__init__.py | 4 + d3b_api_client_cli/faker/global_id.py | 98 +++++++++++++++++++ 7 files changed, 228 insertions(+) create mode 100644 d3b_api_client_cli/cli/faker/__init__.py create mode 100644 d3b_api_client_cli/cli/faker/global_id_commands.py create mode 100644 d3b_api_client_cli/faker/__init__.py create mode 100644 d3b_api_client_cli/faker/global_id.py diff --git a/d3b_api_client_cli/cli/__init__.py b/d3b_api_client_cli/cli/__init__.py index bc2d419..421c467 100644 --- a/d3b_api_client_cli/cli/__init__.py +++ b/d3b_api_client_cli/cli/__init__.py @@ -7,6 +7,14 @@ import click from d3b_api_client_cli.cli.dewrangle import * from d3b_api_client_cli.cli.postgres import * +from d3b_api_client_cli.cli.faker import * + + +@click.group() +def faker(): + """ + Group of lower level CLI commands related to generating fake data + """ @click.group() @@ -35,6 +43,9 @@ def main(): """ +# Fake data commands +faker.add_command(generate_global_id_file) + # Postgres API commands postgres.add_command(save_file_to_db) @@ -61,3 +72,4 @@ def main(): # Add command groups to the root CLI main.add_command(dewrangle) main.add_command(postgres) +main.add_command(faker) diff --git a/d3b_api_client_cli/cli/dewrangle/__init__.py b/d3b_api_client_cli/cli/dewrangle/__init__.py index 4921f23..2c7febd 100644 --- a/d3b_api_client_cli/cli/dewrangle/__init__.py +++ b/d3b_api_client_cli/cli/dewrangle/__init__.py @@ -10,3 +10,4 @@ from d3b_api_client_cli.cli.dewrangle.volume_commands import * from d3b_api_client_cli.cli.dewrangle.job_commands import * from d3b_api_client_cli.cli.dewrangle.billing_group_commands import * +from d3b_api_client_cli.cli.dewrangle.global_id_commands import * diff --git a/d3b_api_client_cli/cli/faker/__init__.py b/d3b_api_client_cli/cli/faker/__init__.py new file mode 100644 index 0000000..0e35a7a --- /dev/null +++ b/d3b_api_client_cli/cli/faker/__init__.py @@ -0,0 +1,5 @@ +""" +Package containing commands for fake data generation +""" + +from d3b_api_client_cli.cli.faker.global_id_commands import * diff --git a/d3b_api_client_cli/cli/faker/global_id_commands.py b/d3b_api_client_cli/cli/faker/global_id_commands.py new file mode 100644 index 0000000..5be0a19 --- /dev/null +++ b/d3b_api_client_cli/cli/faker/global_id_commands.py @@ -0,0 +1,83 @@ +""" +Commands to generate fake global ID descriptors +""" +import os +import logging +import click + +from d3b_api_client_cli.config import ( + log, FHIR_RESOURCE_TYPES, FhirResourceType +) +from d3b_api_client_cli.faker.global_id import ( + generate_global_id_file as _generate_global_id_file +) + +logger = logging.getLogger(__name__) + +DEFAULT_FHIR_RESOURCE_TYPE: FhirResourceType = FHIR_RESOURCE_TYPES["DocumentReference"] + + +@click.command() +@click.option( + "--output-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="Where the output file will be written" +) +@click.option( + "--fhir-resource-type", + default=DEFAULT_FHIR_RESOURCE_TYPE.resource_type, + type=click.Choice(rt for rt in FHIR_RESOURCE_TYPES.keys()), + help="What the fhirResourceType column will be populated with" +) +@click.option( + "--with-global-ids", + default=True, + is_flag=True, + help="Whether or not to generate a globalId column" +) +@click.option( + "--starting-index", + type=int, + default=0, + help="Determines what index the sequential descriptors start at" +) +@click.option( + "--total-rows", + type=int, + default=10, + help="Total number of rows to generate" +) +def generate_global_id_file( + total_rows, starting_index, with_global_ids, fhir_resource_type, output_dir +): + """ + Generate a csv file with global IDs and descriptors. + + \b + Descriptors are formatted like: + \b + - <2 char prefix for resource type>-000 + - Example: For a DocumentReference FHIR resource type the + descriptors would look like `dr-1000` + + \b + When starting_index is supplied it will be added to the row index. + \b + - Example: row 0, starting_index=255, descriptor = dr-25500 + - Example: row 1, starting_index=255, descriptor = dr-25600 + + \b + The starting_index allows a developer to have some control over the + descriptors that get generated so they can test create, replace, and + append functions for global IDs. + """ + + log.init_logger() + + return _generate_global_id_file( + fhir_resource_type, + total_rows=total_rows, + starting_index=starting_index, + with_global_ids=with_global_ids, + output_dir=output_dir + ) diff --git a/d3b_api_client_cli/config/__init__.py b/d3b_api_client_cli/config/__init__.py index 74e7a76..8622c12 100644 --- a/d3b_api_client_cli/config/__init__.py +++ b/d3b_api_client_cli/config/__init__.py @@ -3,12 +3,14 @@ """ import os +from dataclasses import dataclass from dotenv import find_dotenv, load_dotenv # File paths and directories ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname((__file__)))) ROOT_DATA_DIR = os.path.join(ROOT_DIR, "data") +ROOT_FAKE_DATA_DIR = os.path.join(ROOT_DATA_DIR, "fake_data") LOG_DIR = os.path.join(ROOT_DATA_DIR, "logs") DOTENV_PATH = find_dotenv() @@ -27,6 +29,23 @@ DB_USER_PW = os.environ.get("DB_USER_PW") +@dataclass +class FhirResourceType: + """ + Wrapper class to define a FHIR resource type along with a global ID + prefix + """ + resource_type: str + id_prefix: str + + +FHIR_RESOURCE_TYPES: dict = { + resource_type: FhirResourceType(resource_type, prefix) + for resource_type, prefix in + [("DocumentReference", "dr")] +} + + class SECRETS: """ Used in logger initialization to obfuscate sensitive env variables @@ -69,6 +88,12 @@ def check_dewrangle_http_config(): "credential_type": "AWS", "billing_group_id": os.environ.get("CAVATICA_BILLING_GROUP_ID"), }, + "faker": { + "global_id": { + "fhir_resource_types": FHIR_RESOURCE_TYPES + } + + }, "aws": { "region": os.environ.get("AWS_DEFAULT_REGION") or "us-east-1", "s3": { diff --git a/d3b_api_client_cli/faker/__init__.py b/d3b_api_client_cli/faker/__init__.py new file mode 100644 index 0000000..098f26e --- /dev/null +++ b/d3b_api_client_cli/faker/__init__.py @@ -0,0 +1,4 @@ +""" +Package dedicated to generating fake data needed for development and testing +""" + diff --git a/d3b_api_client_cli/faker/global_id.py b/d3b_api_client_cli/faker/global_id.py new file mode 100644 index 0000000..2420c24 --- /dev/null +++ b/d3b_api_client_cli/faker/global_id.py @@ -0,0 +1,98 @@ +""" +Generate files of global ID descriptors for testing and development +""" + +import os +from typing import Optional +from pprint import pformat +import logging +import random + +import pandas + +from d3b_api_client_cli.config import ( + config, FhirResourceType, ROOT_FAKE_DATA_DIR +) + +FHIR_RESOURCE_TYPES: dict[ + str, + FhirResourceType +] = config["faker"]["global_id"]["fhir_resource_types"] +DEFAULT_FHIR_RESOURCE_TYPE: str = "DocumentReference" + +logger = logging.getLogger(__name__) + + +def generate_global_id_file( + fhir_resource_type: Optional[str] = DEFAULT_FHIR_RESOURCE_TYPE, + with_global_ids: Optional[bool] = True, + total_rows: Optional[int] = 10, + starting_index: Optional[int] = 0, + output_dir: Optional[str] = None +) -> str: + """ + Generate a csv file with global IDs and descriptors. + + Descriptors are formatted like: + + - <2 char prefix for resource type>-000 + - Example: dr-1000 + + When starting_index is supplied it will be added to the row index. + + - Example: row 0, starting_index=255, descriptor = dr-25500 + - Example: row 1, starting_index=255, descriptor = dr-25600 + + The starting_index allows a developer to have some control over the + descriptors that get generated so they can test create, replace, and + append functions for descriptors. + + Options: + - fhir_resource_type: the FHIR resource type and global ID prefix + to populate the file with + + - with_global_ids: Whether or not to include a column for global IDs + if global IDs are not included and this file is used in + upsert_global_descriptors, then new global IDs will be created by + Dewrangle + + - total_rows: Number of rows to generate + + - starting_index: Used in generating sequential descriptors. + + Returns: + Path to file + """ + logger.info( + "🏭 Generating %s rows for fake global ID descriptors file", + total_rows + ) + if not output_dir: + output_dir = ROOT_FAKE_DATA_DIR + os.makedirs(output_dir, exist_ok=True) + + fhir_resource_type = FHIR_RESOURCE_TYPES.get(fhir_resource_type) + + data = [] + for i in range(total_rows): + index = i + starting_index + row = { + "fhirResourceType": fhir_resource_type.resource_type, + "descriptor": f"{fhir_resource_type.resource_type}-{index}" + } + if with_global_ids: + row["globalId"] = f"{fhir_resource_type.id_prefix}-{index}000" + data.append(row) + + logger.info("Wrote %s to file", pformat(row)) + + df = pandas.DataFrame(data) + + filepath = os.path.join(output_dir, "fake_global_descriptors.csv") + df.to_csv(filepath, index=False) + + logger.info( + "✅ Completed writing global ID descriptors to %s", filepath + ) + + return filepath From 07ca54d4551a6223e28930211ce4d3a53b5988e0 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 11:43:32 -0500 Subject: [PATCH 10/23] :bug: Add missing CLI cmd declarations --- d3b_api_client_cli/cli/__init__.py | 3 +++ d3b_api_client_cli/cli/dewrangle/global_id_commands.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/d3b_api_client_cli/cli/__init__.py b/d3b_api_client_cli/cli/__init__.py index 421c467..cf8e795 100644 --- a/d3b_api_client_cli/cli/__init__.py +++ b/d3b_api_client_cli/cli/__init__.py @@ -68,6 +68,9 @@ def main(): dewrangle.add_command(create_billing_group) dewrangle.add_command(delete_billing_group) dewrangle.add_command(read_billing_groups) +dewrangle.add_command(upsert_global_descriptors) +dewrangle.add_command(download_global_descriptors) +dewrangle.add_command(upsert_and_download_global_descriptors) # Add command groups to the root CLI main.add_command(dewrangle) diff --git a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py index 150f99d..9da4f4b 100644 --- a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py +++ b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py @@ -8,7 +8,7 @@ import click from d3b_api_client_cli.config import log -from d3b_api_client_cli.dewrangle.rest.files import GlobalIdDescriptorOptions +from d3b_api_client_cli.dewrangle.global_id import GlobalIdDescriptorOptions from d3b_api_client_cli.dewrangle.global_id import ( upsert_global_descriptors as _upsert_global_descriptors, download_global_descriptors as _download_global_descriptors, From 269a06720767c9703aca2a6626b0fae3db044859 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 13:08:50 -0500 Subject: [PATCH 11/23] :truck: Mv global id specific funcs into global_id module --- d3b_api_client_cli/dewrangle/global_id.py | 98 +++++++++++++++++++++- d3b_api_client_cli/dewrangle/rest/files.py | 92 ++------------------ 2 files changed, 100 insertions(+), 90 deletions(-) diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py index 54ec4a1..7d41a52 100644 --- a/d3b_api_client_cli/dewrangle/global_id.py +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -2,11 +2,14 @@ Dewrangle functions to create, update, remove global descriptors in Dewrangle """ +from enum import Enum from typing import Optional +from pprint import pformat import logging import os from d3b_api_client_cli.dewrangle.graphql import study as study_api +from d3b_api_client_cli.dewrangle.rest.files import download_file from d3b_api_client_cli.config import ( config, @@ -14,8 +17,6 @@ ) from d3b_api_client_cli.dewrangle.rest import ( upload_study_file, - download_global_descriptors, - GlobalIdDescriptorOptions ) from d3b_api_client_cli.utils import timestamp @@ -26,6 +27,14 @@ DEFAULT_FILENAME = f"dewrangle-file-{timestamp()}.csv" +class GlobalIdDescriptorOptions(Enum): + """ + Used in download_global_descriptors + """ + DOWNLOAD_ALL_DESC = "all" + DOWNLOAD_MOST_RECENT = "most-recent" + + def upsert_and_download_global_descriptors( input_filepath: str, study_global_id: Optional[str], @@ -133,7 +142,7 @@ def upsert_global_descriptors( url = f"{base_url}/{endpoint}" logger.info("🛸 POST global IDs file %s to Dewrangle %s", filepath, url) - result = upload_study_file(dewrangle_study_id, filepath) + result = upload_study_file(dewrangle_study_id, filepath=filepath) study_file_id = result["id"] # Trigger global descriptor upsert mutation @@ -152,3 +161,86 @@ def upsert_global_descriptors( ) return result + + +def download_global_descriptors( + dewrangle_study_id: Optional[str] = None, + study_global_id: Optional[str] = None, + job_id: Optional[str] = None, + descriptors: Optional[GlobalIdDescriptorOptions] = None, # noqa + filepath: Optional[str] = None, + output_dir: Optional[str] = None, +) -> str: + """ + Download study's global IDs from Dewrangle + + Args: + - dewrangle_study_id: GraphQL ID of study in Dewrangle + - filepath: GraphQL ID of study in Dewrangle + Options: + - job_id: The job ID returned from the upsert_global_descriptors + method. If this is provided, only global IDs from that + job will be returned. + + - descriptors: A query parameter that determines how many descriptors + will be returned for the global ID. + + If set to "all" return all descriptors associated + with the global ID + + If set to "most-recent" return the most recent + descriptor associated with the global ID + + - filepath: If filepath is provided, download content to that filepath + + - output_dir: If output_dir is provided, get filename from + Content-Disposition header and download the file to the + output directory with that filename + """ + if dewrangle_study_id: + study = study_api.read_study(dewrangle_study_id) + else: + study = study_api.find_study(study_global_id) + + if not study: + raise ValueError( + f"❌ Study " + f"{study_global_id if study_global_id else dewrangle_study_id}" + " does not exist in Dewrangle. Aborting" + ) + + study_global_id = study["globalId"] + dewrangle_study_id = study["id"] + + if not descriptors: + descriptors = GlobalIdDescriptorOptions.DOWNLOAD_ALL_DESC.value + + base_url = config["dewrangle"]["base_url"] + endpoint_template = config["dewrangle"]["endpoints"]["rest"]["global_id"] + endpoint = endpoint_template.format(dewrangle_study_id=dewrangle_study_id) + url = f"{base_url}/{endpoint}" + + params = {} + if job_id: + params.update({"job": job_id}) + if descriptors: + params.update({"descriptors": descriptors}) + + logger.info( + "🛸 Start download of global IDs for study %s from Dewrangle: %s" + " Params: %s", + study_global_id, + url, + pformat(params) + ) + + filepath = download_file( + url, + output_dir=output_dir, + filepath=filepath, + params=params + ) + + logger.info("✅ Completed download of global IDs: %s", filepath) + + return filepath diff --git a/d3b_api_client_cli/dewrangle/rest/files.py b/d3b_api_client_cli/dewrangle/rest/files.py index 0866acd..ed16534 100644 --- a/d3b_api_client_cli/dewrangle/rest/files.py +++ b/d3b_api_client_cli/dewrangle/rest/files.py @@ -3,13 +3,11 @@ """ from typing import Optional -from enum import Enum -from pprint import pformat +from pprint import pformat, pprint import logging import os import cgi -from d3b_api_client_cli.dewrangle.graphql import study as study_api from d3b_api_client_cli.config import ( DEWRANGLE_DEV_PAT, @@ -26,14 +24,6 @@ DEFAULT_FILENAME = f"dewrangle-file-{timestamp()}.csv" -class GlobalIdDescriptorOptions(Enum): - """ - Used in download_global_descriptors - """ - DOWNLOAD_ALL_DESC = "all" - DOWNLOAD_MOST_RECENT = "most-recent" - - def _filename_from_headers(headers: dict) -> str: """ Helper to get the filename from the Content-Disposition @@ -45,20 +35,20 @@ def _filename_from_headers(headers: dict) -> str: def upload_file( url: str, - filepath: Optional[str] = None, + filepath: str, params: Optional[dict] = None ): """ Upload a file to Dewrangle """ logger.info("🛸 Starting upload of %s to %s", filepath, url) - with open(filepath, "rb") as jsonfile: + with open(filepath, "rb") as file_to_upload: headers = {"x-api-key": DEWRANGLE_DEV_PAT} resp = send_request( "post", url, headers=headers, - data=jsonfile, + data=file_to_upload, params=params, # Set timeout to infinity so that uploads don't timeout timeout=-1, @@ -130,79 +120,7 @@ def upload_study_file(dewrangle_study_id: str, filepath: str): ) url = f"{base_url}/{endpoint}" - return upload_file(url) - - -def download_global_descriptors( - dewrangle_study_id: str, - job_id: Optional[str] = None, - descriptors: Optional[GlobalIdDescriptorOptions] = GlobalIdDescriptorOptions.DOWNLOAD_ALL_DESC.value, # noqa - filepath: Optional[str] = None, - output_dir: Optional[str] = None, -) -> str: - """ - Download study's global IDs from Dewrangle - - Args: - - dewrangle_study_id: GraphQL ID of study in Dewrangle - - filepath: GraphQL ID of study in Dewrangle - Options: - - job_id: The job ID returned from the upsert_global_descriptors - method. If this is provided, only global IDs from that - job will be returned. - - - descriptors: A query parameter that determines how many descriptors - will be returned for the global ID. - - If set to "all" return all descriptors associated - with the global ID - - If set to "most-recent" return the most recent - descriptor associated with the global ID - - - filepath: If filepath is provided, download content to that filepath - - - output_dir: If output_dir is provided, get filename from - Content-Disposition header and download the file to the - output directory with that filename - """ - study = study_api.read_study(dewrangle_study_id) - - if not study: - raise ValueError( - f"❌ Study {dewrangle_study_id}" - " does not exist in Dewrangle. Aborting" - ) - - study_global_id = study["globalId"] - - logger.info( - "🛸 Start downloading global IDs for study %s from Dewrangle ...", - study_global_id - ) - - filepath = os.path.abspath(filepath) - base_url = config["dewrangle"]["base_url"] - endpoint_template = config["dewrangle"]["endpoints"]["rest"]["global_id"] - endpoint = endpoint_template.format(dewrangle_study_id=dewrangle_study_id) - url = f"{base_url}/{endpoint}" - - params = {} - if job_id: - params.update({"job": job_id}) - if descriptors: - params.update({"descriptors": descriptors}) - - filepath = download_file( - url, - output_dir=output_dir, - filepath=filepath, - params=params - ) - - logger.info("✅ Completed download of global IDs: %s", filepath) - - return filepath + return upload_file(url, filepath) def download_job_errors( From 606141e323534e161efeb1cdb95a82a53bc02b2b Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 13:09:21 -0500 Subject: [PATCH 12/23] :bug: Fix bugs w global id cmd defs --- d3b_api_client_cli/cli/dewrangle/global_id_commands.py | 6 ++++-- d3b_api_client_cli/cli/faker/global_id_commands.py | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py index 9da4f4b..c578ccc 100644 --- a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py +++ b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py @@ -185,8 +185,10 @@ def download_global_descriptors( "the study's GraphQL ID in Dewrangle" ) - return download_global_descriptors( - study_id, + + return _download_global_descriptors( + dewrangle_study_id=study_id, + study_global_id=study_global_id, filepath=filepath, job_id=job_id, descriptors=descriptors, diff --git a/d3b_api_client_cli/cli/faker/global_id_commands.py b/d3b_api_client_cli/cli/faker/global_id_commands.py index 5be0a19..b7cb603 100644 --- a/d3b_api_client_cli/cli/faker/global_id_commands.py +++ b/d3b_api_client_cli/cli/faker/global_id_commands.py @@ -31,7 +31,6 @@ ) @click.option( "--with-global-ids", - default=True, is_flag=True, help="Whether or not to generate a globalId column" ) From 2f316167980c56647e8db55a3cdae872e093282f Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 13:09:40 -0500 Subject: [PATCH 13/23] :goal_net: Catch and log global ID upsert errors --- .../dewrangle/graphql/study/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/d3b_api_client_cli/dewrangle/graphql/study/__init__.py b/d3b_api_client_cli/dewrangle/graphql/study/__init__.py index fba6a64..1919600 100644 --- a/d3b_api_client_cli/dewrangle/graphql/study/__init__.py +++ b/d3b_api_client_cli/dewrangle/graphql/study/__init__.py @@ -53,6 +53,20 @@ def upsert_global_descriptors( } resp = exec_query(mutations.upsert_global_descriptors, variables=variables) + key = "globalDescriptorUpsert" + mutation_errors = resp.get(key, {}).get("errors") + job_errors = resp.get(key, {}).get( + "job", {}).get("errors", {}).get("edges", []) + + if mutation_errors or job_errors: + logger.error("❌ %s for study failed", key) + if mutation_errors: + logger.error("❌ Mutation Errors:\n%s", pformat(mutation_errors)) + if job_errors: + logger.error("❌ Job Errors:\n%s", pformat(job_errors)) + else: + logger.info("✅ %s for study succeeded:\n%s", key, pformat(resp)) + return resp From fc3e0112192b7d6cb728390ba4db26e66363952b Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 14:09:05 -0500 Subject: [PATCH 14/23] :recycle: Generate fake Dewrangle global IDs --- .../cli/faker/global_id_commands.py | 2 +- d3b_api_client_cli/config/__init__.py | 3 +- d3b_api_client_cli/dewrangle/global_id.py | 4 +++ d3b_api_client_cli/faker/global_id.py | 31 ++++++++++++++++--- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/d3b_api_client_cli/cli/faker/global_id_commands.py b/d3b_api_client_cli/cli/faker/global_id_commands.py index b7cb603..b623cf9 100644 --- a/d3b_api_client_cli/cli/faker/global_id_commands.py +++ b/d3b_api_client_cli/cli/faker/global_id_commands.py @@ -26,7 +26,7 @@ @click.option( "--fhir-resource-type", default=DEFAULT_FHIR_RESOURCE_TYPE.resource_type, - type=click.Choice(rt for rt in FHIR_RESOURCE_TYPES.keys()), + type=click.Choice([rt for rt in FHIR_RESOURCE_TYPES.keys()]), help="What the fhirResourceType column will be populated with" ) @click.option( diff --git a/d3b_api_client_cli/config/__init__.py b/d3b_api_client_cli/config/__init__.py index 8622c12..5907c39 100644 --- a/d3b_api_client_cli/config/__init__.py +++ b/d3b_api_client_cli/config/__init__.py @@ -80,7 +80,8 @@ def check_dewrangle_http_config(): "endpoints": { "graphql": "/api/graphql", "rest": { - "hash_report": "/api/rest/jobs/{job_id}/report/volume-hash", + "study_file": "api/rest/studies/{dewrangle_study_id}/files/{filename}", + "global_id": "api/rest/studies/{dewrangle_study_id}/global-descriptors", "job_errors": "/api/rest/jobs/{job_id}/errors", }, }, diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py index 7d41a52..bcf5897 100644 --- a/d3b_api_client_cli/dewrangle/global_id.py +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -177,6 +177,7 @@ def download_global_descriptors( Args: - dewrangle_study_id: GraphQL ID of study in Dewrangle - filepath: GraphQL ID of study in Dewrangle + Options: - job_id: The job ID returned from the upsert_global_descriptors method. If this is provided, only global IDs from that @@ -220,9 +221,12 @@ def download_global_descriptors( endpoint = endpoint_template.format(dewrangle_study_id=dewrangle_study_id) url = f"{base_url}/{endpoint}" + # Download global IDs associated with this job only params = {} if job_id: params.update({"job": job_id}) + + # Download all descriptors associated with each affected global id if descriptors: params.update({"descriptors": descriptors}) diff --git a/d3b_api_client_cli/faker/global_id.py b/d3b_api_client_cli/faker/global_id.py index 2420c24..be20640 100644 --- a/d3b_api_client_cli/faker/global_id.py +++ b/d3b_api_client_cli/faker/global_id.py @@ -6,7 +6,6 @@ from typing import Optional from pprint import pformat import logging -import random import pandas @@ -23,6 +22,26 @@ logger = logging.getLogger(__name__) +def _generate_fake_global_id( + prefix: str, starting_index: Optional[int] = 0 +) -> str: + """ + Generate a fake Dewrangle global ID + """ + starting_index = str(starting_index) + + if not starting_index.isdigit(): + raise ValueError("Starting index must contain only digits.") + + if len(starting_index) > 10: + raise ValueError("Starting index cannot be longer than 10 digits.") + + remaining_length = 10 - len(starting_index) + remaining = "0" * remaining_length + + return f"{prefix}-{str(starting_index)}{remaining}" + + def generate_global_id_file( fhir_resource_type: Optional[str] = DEFAULT_FHIR_RESOURCE_TYPE, with_global_ids: Optional[bool] = True, @@ -75,13 +94,17 @@ def generate_global_id_file( data = [] for i in range(total_rows): - index = i + starting_index + rt = fhir_resource_type.resource_type + global_id = _generate_fake_global_id( + fhir_resource_type.id_prefix, starting_index + i + ) + descriptor_suffix = global_id.split("-")[-1] row = { "fhirResourceType": fhir_resource_type.resource_type, - "descriptor": f"{fhir_resource_type.resource_type}-{index}" + "descriptor": f"{rt}-{descriptor_suffix}" } if with_global_ids: - row["globalId"] = f"{fhir_resource_type.id_prefix}-{index}000" + row["globalId"] = global_id data.append(row) logger.info("Wrote %s to file", pformat(row)) From 23f58d65e64c1184cbb9bb5d57a9f8de20dd3a4a Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 14:09:29 -0500 Subject: [PATCH 15/23] :white_check_mark: Test global ID file generator --- tests/conftest.py | 4 +- .../integration/dewrangle/test_global_ids.py | 12 +++ tests/unit/faker/test_fake_global_ids.py | 73 +++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 tests/integration/dewrangle/test_global_ids.py create mode 100644 tests/unit/faker/test_fake_global_ids.py diff --git a/tests/conftest.py b/tests/conftest.py index e831178..964f02b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,7 +22,7 @@ AWS_BUCKET_DATA_TRANSFER_TEST = config["aws"]["s3"]["test_bucket_name"] POSTGRES_DB_IMAGE = "postgres:16-alpine" - +ORG_NAME = "Integration Test d3b-api-client-cli" @pytest.fixture(scope="session") def organization_file(tmp_path_factory): @@ -30,7 +30,7 @@ def organization_file(tmp_path_factory): Write the inputs to create a Dewrangle Organization to file """ - def create_and_write_org(org_name="TestOrg"): + def create_and_write_org(org_name=ORG_NAME): data_dir = tmp_path_factory.mktemp("data") org_filepath = os.path.join(data_dir, "Organization.json") org = { diff --git a/tests/integration/dewrangle/test_global_ids.py b/tests/integration/dewrangle/test_global_ids.py new file mode 100644 index 0000000..6b85ad2 --- /dev/null +++ b/tests/integration/dewrangle/test_global_ids.py @@ -0,0 +1,12 @@ +""" +Test Dewrangle global ID commands +""" + +def test_upsert_global_descriptors(): + pass + +def test_download_global_descriptors(): + pass + +def test_upsert_and_download_global_descriptors(): + pass diff --git a/tests/unit/faker/test_fake_global_ids.py b/tests/unit/faker/test_fake_global_ids.py new file mode 100644 index 0000000..ac3fff6 --- /dev/null +++ b/tests/unit/faker/test_fake_global_ids.py @@ -0,0 +1,73 @@ +""" +Test generating fake data for global ID commands +""" +import pytest +from click.testing import CliRunner +import pandas + +from d3b_api_client_cli.cli.faker.global_id_commands import * +from d3b_api_client_cli.faker.global_id import ( + generate_global_id_file as _generate_global_id_file, + DEFAULT_FHIR_RESOURCE_TYPE +) + + +@pytest.mark.parametrize( + "kwargs,error_msg", + [ + ( + { + "fhir_resource_type": "foo" + }, "BadParameter" + ) + ] +) +def test_generate_global_ids_errors(kwargs, error_msg): + """ + Test generate_global_id_file errors + """ + runner = CliRunner() + result = runner.invoke( + generate_global_id_file, + ["--fhir-resource-type", "foo"], + standalone_mode=False, + ) + assert result.exit_code == 1 + assert error_msg in str(result.exc_info) + + +def test_generate_global_ids(tmp_path): + """ + Test generate_global_id_file + """ + temp_dir = tmp_path / "output" + temp_dir.mkdir() + + # With global IDs + filepath = _generate_global_id_file( + starting_index=250, + output_dir=temp_dir + ) + df = pandas.read_csv(filepath) + + for c in ["fhirResourceType", "descriptor", "globalId"]: + assert c in df.columns + + assert df["fhirResourceType"].eq(DEFAULT_FHIR_RESOURCE_TYPE).all() + assert df["descriptor"].apply( + lambda d: int(d.split("-")[-1]) + ).ge(250000).all() + + # Without global IDs + filepath = _generate_global_id_file( + output_dir=temp_dir, + with_global_ids=False + ) + df = pandas.read_csv(filepath) + assert "globalId" not in df.columns + assert df["descriptor"].apply( + lambda d: int(d.split("-")[-1]) + ).ge(0).all() + assert df["descriptor"].apply( + lambda d: int(d.split("-")[-1]) + ).le(9000000000).all() From c7301cccdddd467daacf4ae480557512790d17ea Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 16:26:42 -0500 Subject: [PATCH 16/23] :bug: Fix default values for CLI cmds --- d3b_api_client_cli/dewrangle/global_id.py | 4 ++-- d3b_api_client_cli/faker/global_id.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py index bcf5897..1e8f30b 100644 --- a/d3b_api_client_cli/dewrangle/global_id.py +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -37,8 +37,8 @@ class GlobalIdDescriptorOptions(Enum): def upsert_and_download_global_descriptors( input_filepath: str, - study_global_id: Optional[str], - dewrangle_study_id: Optional[str], + study_global_id: Optional[str] = None, + dewrangle_study_id: Optional[str] = None, skip_unavailable_descriptors: Optional[bool] = True, descriptors: Optional[GlobalIdDescriptorOptions] = GlobalIdDescriptorOptions.DOWNLOAD_ALL_DESC.value, # noqa output_dir: Optional[str] = None, diff --git a/d3b_api_client_cli/faker/global_id.py b/d3b_api_client_cli/faker/global_id.py index be20640..7d87006 100644 --- a/d3b_api_client_cli/faker/global_id.py +++ b/d3b_api_client_cli/faker/global_id.py @@ -44,7 +44,7 @@ def _generate_fake_global_id( def generate_global_id_file( fhir_resource_type: Optional[str] = DEFAULT_FHIR_RESOURCE_TYPE, - with_global_ids: Optional[bool] = True, + with_global_ids: Optional[bool] = False, total_rows: Optional[int] = 10, starting_index: Optional[int] = 0, output_dir: Optional[str] = None From 829fb48bda47297d1d107d5f803c36399e67c9cd Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 16:27:01 -0500 Subject: [PATCH 17/23] :white_check_mark: Test global ID upsert/download commands --- tests/conftest.py | 8 +- .../dewrangle/test_crud_organization.py | 3 +- .../integration/dewrangle/test_global_ids.py | 145 +++++++++++++++++- tests/unit/dewrangle/test_global_ids.py | 95 ++++++++++++ tests/unit/faker/test_fake_global_ids.py | 3 +- 5 files changed, 243 insertions(+), 11 deletions(-) create mode 100644 tests/unit/dewrangle/test_global_ids.py diff --git a/tests/conftest.py b/tests/conftest.py index 964f02b..77f0fcf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,7 +22,8 @@ AWS_BUCKET_DATA_TRANSFER_TEST = config["aws"]["s3"]["test_bucket_name"] POSTGRES_DB_IMAGE = "postgres:16-alpine" -ORG_NAME = "Integration Test d3b-api-client-cli" +ORG_NAME = "Integration Tests d3b-api-client-cli" + @pytest.fixture(scope="session") def organization_file(tmp_path_factory): @@ -72,7 +73,7 @@ def dewrangle_org(organization_file): """ Upsert an Organization in Dewrangle for other tests to use """ - fp = organization_file(org_name="Integration Tests") + fp = organization_file() runner = CliRunner() result = runner.invoke(upsert_organization, [fp], standalone_mode=False) assert result.exit_code == 0 @@ -93,7 +94,8 @@ def dewrangle_study(dewrangle_org, study_file): fp = study_file() runner = CliRunner() - result = runner.invoke(upsert_study, [fp, org["id"]], standalone_mode=False) + result = runner.invoke( + upsert_study, [fp, org["id"]], standalone_mode=False) return result.return_value, fp diff --git a/tests/integration/dewrangle/test_crud_organization.py b/tests/integration/dewrangle/test_crud_organization.py index 039425d..6dd76de 100644 --- a/tests/integration/dewrangle/test_crud_organization.py +++ b/tests/integration/dewrangle/test_crud_organization.py @@ -10,6 +10,7 @@ from d3b_api_client_cli.utils import read_json, write_json from d3b_api_client_cli.cli import * from d3b_api_client_cli.dewrangle.graphql import organization +from tests.conftest import ORG_NAME def test_upsert_organization(tmp_path, organization_file): @@ -17,7 +18,7 @@ def test_upsert_organization(tmp_path, organization_file): Test `d3b-clients dewrangle upsert-organization` command """ # Create - fp = organization_file() + fp = organization_file(org_name=ORG_NAME + " 2") organization = read_json(fp) runner = CliRunner() result = runner.invoke(upsert_organization, [fp], standalone_mode=False) diff --git a/tests/integration/dewrangle/test_global_ids.py b/tests/integration/dewrangle/test_global_ids.py index 6b85ad2..dcff6c3 100644 --- a/tests/integration/dewrangle/test_global_ids.py +++ b/tests/integration/dewrangle/test_global_ids.py @@ -2,11 +2,144 @@ Test Dewrangle global ID commands """ -def test_upsert_global_descriptors(): - pass +import os -def test_download_global_descriptors(): - pass +import pytest +from click.testing import CliRunner +import pandas -def test_upsert_and_download_global_descriptors(): - pass +from d3b_api_client_cli.cli.dewrangle.global_id_commands import ( + upsert_global_descriptors, + download_global_descriptors, + upsert_and_download_global_descriptors +) +from d3b_api_client_cli.dewrangle.global_id import ( + upsert_global_descriptors as _upsert_global_descriptors +) +from d3b_api_client_cli.faker.global_id import ( + generate_global_id_file, +) + + +@pytest.fixture(scope="session") +def upserted_global_descriptors(dewrangle_study): + """ + Upsert global descriptors + """ + study, fp = dewrangle_study + output_dir = os.path.dirname(fp) + + filepath = generate_global_id_file(output_dir=output_dir) + + runner = CliRunner() + result = runner.invoke( + upsert_global_descriptors, + [filepath, "--study-id", study["id"]], + standalone_mode=False, + ) + assert result.exit_code == 0 + assert result.return_value + + return result.return_value, filepath + + +@pytest.fixture(scope="session") +def downloaded_global_descriptors(upserted_global_descriptors): + """ + Download newly created global descriptors + """ + result, filepath = upserted_global_descriptors + output_dir = os.path.dirname(filepath) + study_id = result["study_id"] + job_id = result["job"]["id"] + + runner = CliRunner() + + result = runner.invoke( + download_global_descriptors, + [ + "--study-id", study_id, "--job-id", job_id, + "--output-dir", output_dir + ], + standalone_mode=False, + ) + assert result.exit_code == 0 + filepath = result.return_value + + return study_id, filepath + + +def test_upsert_global_descriptors(upserted_global_descriptors): + """ + Test d3b-clients dewrangle upsert-global-descriptors + """ + upserted_global_descriptors + + +def test_download_global_descriptors(downloaded_global_descriptors): + """ + Test d3b-clients dewrangle download-global-descriptors + """ + _, filepath = downloaded_global_descriptors + df = pandas.read_csv(filepath) + assert df.shape[0] == 10 + + +def test_upsert_and_download_global_descriptors(downloaded_global_descriptors): + """ + Test d3b-clients dewrangle upsert-and-download-global-descriptors + """ + study_id, filepath = downloaded_global_descriptors + output_dir = os.path.dirname(filepath) + + # Update the descriptors + df = pandas.read_csv(filepath) + df = df[[c for c in ("fhirResourceType", "descriptor", "globalId")]] + df["descriptor"] = df["descriptor"].apply( + lambda d: d + "1" + ) + df.to_csv(filepath, index=False) + + runner = CliRunner() + + # Upsert and download the descriptors + result = runner.invoke( + upsert_and_download_global_descriptors, + [ + filepath, + "--study-id", study_id, + "--output-dir", output_dir + ], + standalone_mode=False, + ) + assert result.exit_code == 0 + filepath = result.return_value + + df = pandas.read_csv(filepath) + assert df.shape[0] == 10 + + +def test_download_all_descriptors(dewrangle_study): + """ + Test d3b-clients dewrangle download-global-descriptors for all ids + """ + study, filepath = dewrangle_study + output_dir = os.path.dirname(filepath) + + runner = CliRunner() + result = runner.invoke( + download_global_descriptors, + [ + "--study-id", study["id"], + "--descriptors", "all", + "--output-dir", output_dir + ], + standalone_mode=False, + ) + assert result.exit_code == 0 + filepath = result.return_value + + df = pandas.read_csv(filepath) + + # Should have double the descriptors plus one for the study + assert df.shape[0] == 21 diff --git a/tests/unit/dewrangle/test_global_ids.py b/tests/unit/dewrangle/test_global_ids.py new file mode 100644 index 0000000..ec95edf --- /dev/null +++ b/tests/unit/dewrangle/test_global_ids.py @@ -0,0 +1,95 @@ +""" +Unit test global ID command +""" + +import pytest +from click.testing import CliRunner + +from d3b_api_client_cli.cli.dewrangle.global_id_commands import ( + upsert_global_descriptors +) +from d3b_api_client_cli.dewrangle.global_id import ( + upsert_global_descriptors as _upsert_global_descriptors, + download_global_descriptors as _download_global_descriptors, +) + + +def test_upsert_global_descriptors_cli_errors(): + """ + Test d3b-clients dewrangle upser-global-descriptor errors + """ + runner = CliRunner() + + result = runner.invoke( + upsert_global_descriptors, + ["global_ids.csv"], + standalone_mode=False, + ) + assert result.exit_code == 1 + assert "BadParameter" in str(result.exc_info) + assert "global ID" in str(result.exc_info) + + +@pytest.mark.parametrize( + "kwargs", + [ + { + "dewrangle_study_id": None, + "study_global_id": "foo" + }, + { + "dewrangle_study_id": "foo", + "study_global_id": None + } + + ] +) +def test_upsert_global_descriptors_no_study(mocker, kwargs): + """ + Test d3b-clients dewrangle upsert-global-descriptors when study + is not found + """ + mock_study_api = mocker.patch( + "d3b_api_client_cli.dewrangle.global_id.study_api" + ) + mock_study_api.read_study.return_value = {} + mock_study_api.find_study.return_value = {} + + with pytest.raises(ValueError) as e: + _upsert_global_descriptors( + "global_ids.csv", + **kwargs + ) + assert "does not exist" in str(e) + + +@pytest.mark.parametrize( + "kwargs", + [ + { + "dewrangle_study_id": None, + "study_global_id": "foo" + }, + { + "dewrangle_study_id": "foo", + "study_global_id": None + } + + ] +) +def test_download_global_descriptors_no_study(mocker, kwargs): + """ + Test d3b-clients dewrangle download-global-descriptors when study + is not found + """ + mock_study_api = mocker.patch( + "d3b_api_client_cli.dewrangle.global_id.study_api" + ) + mock_study_api.read_study.return_value = {} + mock_study_api.find_study.return_value = {} + + with pytest.raises(ValueError) as e: + _download_global_descriptors( + **kwargs + ) + assert "does not exist" in str(e) diff --git a/tests/unit/faker/test_fake_global_ids.py b/tests/unit/faker/test_fake_global_ids.py index ac3fff6..80c9383 100644 --- a/tests/unit/faker/test_fake_global_ids.py +++ b/tests/unit/faker/test_fake_global_ids.py @@ -46,7 +46,8 @@ def test_generate_global_ids(tmp_path): # With global IDs filepath = _generate_global_id_file( starting_index=250, - output_dir=temp_dir + output_dir=temp_dir, + with_global_ids=True, ) df = pandas.read_csv(filepath) From 0b498f60daaa3d95733adecc919c927fb917a3c0 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 16:56:27 -0500 Subject: [PATCH 18/23] :bug: Fix broken org tests --- .../integration/dewrangle/test_crud_organization.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/integration/dewrangle/test_crud_organization.py b/tests/integration/dewrangle/test_crud_organization.py index 6dd76de..1a511d0 100644 --- a/tests/integration/dewrangle/test_crud_organization.py +++ b/tests/integration/dewrangle/test_crud_organization.py @@ -12,13 +12,15 @@ from d3b_api_client_cli.dewrangle.graphql import organization from tests.conftest import ORG_NAME +TEST_ORG_NAME = ORG_NAME + " for orgs" + def test_upsert_organization(tmp_path, organization_file): """ Test `d3b-clients dewrangle upsert-organization` command """ # Create - fp = organization_file(org_name=ORG_NAME + " 2") + fp = organization_file(org_name=TEST_ORG_NAME) organization = read_json(fp) runner = CliRunner() result = runner.invoke(upsert_organization, [fp], standalone_mode=False) @@ -72,7 +74,7 @@ def test_delete_organization_safety_check_on(): runner = CliRunner() result = runner.invoke( delete_organization, - ["--dewrangle-org-name", "TestOrg"], + ["--dewrangle-org-name", TEST_ORG_NAME], standalone_mode=False, ) assert result.exit_code == 1 @@ -82,7 +84,7 @@ def test_delete_organization_safety_check_on(): found_org = None if orgs: for org in orgs: - if org["name"] == "TestOrg": + if org["name"] == TEST_ORG_NAME: found_org = org break assert found_org @@ -99,7 +101,7 @@ def test_delete_organization_safety_check_off(): dwid = None for org in orgs: - if org["name"] == "TestOrg": + if org["name"] == TEST_ORG_NAME: dwid = org["id"] break @@ -118,4 +120,4 @@ def test_delete_organization_safety_check_off(): orgs = organization.read_organizations() if orgs: - assert all([org["name"] != "TestOrg" for org in orgs]) + assert all([org["name"] != TEST_ORG_NAME for org in orgs]) From 459b9646f86cc9e4d87ad08ca428417cb5c1b456 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 17:53:56 -0500 Subject: [PATCH 19/23] :recycle: Change --descriptors to --download-all --- .../cli/dewrangle/global_id_commands.py | 26 ++++++++----------- d3b_api_client_cli/dewrangle/global_id.py | 18 +++++++------ .../integration/dewrangle/test_global_ids.py | 2 +- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py index c578ccc..e908de0 100644 --- a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py +++ b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py @@ -18,6 +18,7 @@ logger = logging.getLogger(__name__) + @click.command() @click.option( "--output-filepath", @@ -33,11 +34,9 @@ "this directory" ) @click.option( - "--descriptors", - type=click.Choice( - item.value for item in GlobalIdDescriptorOptions - ), - help="Which descriptor(s) for each global ID to download. Either download" + "--download-all", + is_flag=True, + help="What descriptor(s) for each global ID to download. Either download" " all descriptors for each global ID or just the most recent" ) @click.option( @@ -55,7 +54,7 @@ type=click.Path(exists=False, file_okay=True, dir_okay=False), ) def upsert_and_download_global_descriptors( - input_filepath, study_id, study_global_id, descriptors, output_dir, + input_filepath, study_id, study_global_id, download_all, output_dir, output_filepath ): """ @@ -86,7 +85,7 @@ def upsert_and_download_global_descriptors( input_filepath, study_global_id=study_global_id, dewrangle_study_id=study_id, - descriptors=descriptors, + download_all=download_all, output_dir=output_dir, output_filepath=output_filepath, ) @@ -143,11 +142,9 @@ def upsert_global_descriptors(filepath, study_id, study_global_id): "this directory" ) @click.option( - "--descriptors", - type=click.Choice( - item.value for item in GlobalIdDescriptorOptions - ), - help="Which descriptor(s) for each global ID to download. Either download" + "--download-all", + is_flag=True, + help="What descriptor(s) for each global ID to download. Either download" " all descriptors for each global ID or just the most recent" ) @click.option( @@ -171,7 +168,7 @@ def upsert_global_descriptors(filepath, study_id, study_global_id): "precedence over --output-dir" ) def download_global_descriptors( - filepath, study_id, study_global_id, job_id, descriptors, output_dir + filepath, study_id, study_global_id, job_id, download_all, output_dir ): """ Download global ID descriptors in Dewrangle for a study. @@ -185,12 +182,11 @@ def download_global_descriptors( "the study's GraphQL ID in Dewrangle" ) - return _download_global_descriptors( dewrangle_study_id=study_id, study_global_id=study_global_id, filepath=filepath, job_id=job_id, - descriptors=descriptors, + download_all=download_all, output_dir=output_dir, ) diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py index 1e8f30b..7c34404 100644 --- a/d3b_api_client_cli/dewrangle/global_id.py +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -40,7 +40,7 @@ def upsert_and_download_global_descriptors( study_global_id: Optional[str] = None, dewrangle_study_id: Optional[str] = None, skip_unavailable_descriptors: Optional[bool] = True, - descriptors: Optional[GlobalIdDescriptorOptions] = GlobalIdDescriptorOptions.DOWNLOAD_ALL_DESC.value, # noqa + download_all: Optional[bool] = True, output_dir: Optional[str] = None, output_filepath: Optional[str] = None, ) -> str: @@ -76,7 +76,7 @@ def upsert_and_download_global_descriptors( filepath = download_global_descriptors( dewrangle_study_id=dewrangle_study_id, job_id=job_id, - descriptors=descriptors, + download_all=download_all, filepath=output_filepath, output_dir=output_dir, ) @@ -167,7 +167,7 @@ def download_global_descriptors( dewrangle_study_id: Optional[str] = None, study_global_id: Optional[str] = None, job_id: Optional[str] = None, - descriptors: Optional[GlobalIdDescriptorOptions] = None, # noqa + download_all: Optional[bool] = True, filepath: Optional[str] = None, output_dir: Optional[str] = None, ) -> str: @@ -183,13 +183,13 @@ def download_global_descriptors( method. If this is provided, only global IDs from that job will be returned. - - descriptors: A query parameter that determines how many descriptors - will be returned for the global ID. + - download_all: Determines how many descriptors + will be returned for the global ID. - If set to "all" return all descriptors associated + If True, return all descriptors associated with the global ID - If set to "most-recent" return the most recent + If False, return the most recent descriptor associated with the global ID - filepath: If filepath is provided, download content to that filepath @@ -213,8 +213,10 @@ def download_global_descriptors( study_global_id = study["globalId"] dewrangle_study_id = study["id"] - if not descriptors: + if download_all: descriptors = GlobalIdDescriptorOptions.DOWNLOAD_ALL_DESC.value + else: + descriptors = GlobalIdDescriptorOptions.DOWNLOAD_MOST_RECENT.value base_url = config["dewrangle"]["base_url"] endpoint_template = config["dewrangle"]["endpoints"]["rest"]["global_id"] diff --git a/tests/integration/dewrangle/test_global_ids.py b/tests/integration/dewrangle/test_global_ids.py index dcff6c3..bfade7f 100644 --- a/tests/integration/dewrangle/test_global_ids.py +++ b/tests/integration/dewrangle/test_global_ids.py @@ -131,7 +131,7 @@ def test_download_all_descriptors(dewrangle_study): download_global_descriptors, [ "--study-id", study["id"], - "--descriptors", "all", + "--download-all", "--output-dir", output_dir ], standalone_mode=False, From b1e9b9f85f7da0eb9174ba1aed167ae4d064e682 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 18:49:37 -0500 Subject: [PATCH 20/23] :sparkles: New CLI cmd to upsert,download 1 descriptor --- d3b_api_client_cli/cli/__init__.py | 1 + .../cli/dewrangle/global_id_commands.py | 92 ++++++++++++++++++- d3b_api_client_cli/dewrangle/global_id.py | 67 +++++++++++++- 3 files changed, 158 insertions(+), 2 deletions(-) diff --git a/d3b_api_client_cli/cli/__init__.py b/d3b_api_client_cli/cli/__init__.py index cf8e795..52acf25 100644 --- a/d3b_api_client_cli/cli/__init__.py +++ b/d3b_api_client_cli/cli/__init__.py @@ -71,6 +71,7 @@ def main(): dewrangle.add_command(upsert_global_descriptors) dewrangle.add_command(download_global_descriptors) dewrangle.add_command(upsert_and_download_global_descriptors) +dewrangle.add_command(upsert_and_download_global_descriptor) # Add command groups to the root CLI main.add_command(dewrangle) diff --git a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py index e908de0..4d9a702 100644 --- a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py +++ b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py @@ -7,17 +7,107 @@ import logging import click -from d3b_api_client_cli.config import log +from d3b_api_client_cli.config import log, FHIR_RESOURCE_TYPES from d3b_api_client_cli.dewrangle.global_id import GlobalIdDescriptorOptions from d3b_api_client_cli.dewrangle.global_id import ( upsert_global_descriptors as _upsert_global_descriptors, download_global_descriptors as _download_global_descriptors, upsert_and_download_global_descriptors as _upsert_and_download_global_descriptors, + upsert_and_download_global_descriptor as _upsert_and_download_global_descriptor, ) logger = logging.getLogger(__name__) +@click.command() +@click.option( + "--output-filepath", + type=click.Path(exists=False, file_okay=True, dir_okay=False), + help="If provided, download the file to this path. This takes " + "precedence over the --output-dir option" +) +@click.option( + "--output-dir", + default=os.getcwd(), + type=click.Path(exists=True, file_okay=False, dir_okay=True), + help="If provided, download the file with the default file name into " + "this directory" +) +@click.option( + "--download-all", + is_flag=True, + help="What descriptor(s) for each global ID to download. Either download" + " all descriptors for each global ID or just the most recent" +) +@click.option( + "--study-global-id", + help="The global ID of the study in Dewrangle. You must provide either " + "the global ID of the study OR the GraphQL ID of the study but not both" +) +@click.option( + "--study-id", + help="The GraphQL ID of the study in Dewrangle. You must provide either " + "the global ID of the study OR the GraphQL ID of the study but not both" +) +@click.option( + "--global-id", + help="Global ID associated with this descriptor." + " If this is provided, and the descriptor is new, then Dewrangle" + " will append the descriptor to this global ID's descriptor list", +) +@click.option( + "--fhir-resource-type", + type=click.Choice([rt for rt in FHIR_RESOURCE_TYPES.keys()]), + required=True, +) +@click.option( + "--descriptor", + required=True, +) +def upsert_and_download_global_descriptor( + descriptor, + fhir_resource_type, + global_id, + study_id, + study_global_id, + download_all, + output_dir, + output_filepath +): + """ + Send request to upsert one global ID descriptor in Dewrangle and + download the resulting global ID descriptors. + + In order to create new global IDs provide: + descriptor, fhir-resource-type + + In order to update existing global IDs: + descriptor, fhir-resource-type, global-id + + \b + Arguments: + \b + input_filepath - Path to the file with global IDs and descriptors + """ + + log.init_logger() + + if (not study_id) and (not study_global_id): + raise click.BadParameter( + "❌ You must provide either the study's global ID in Dewrangle OR " + "the study's GraphQL ID in Dewrangle" + ) + return _upsert_and_download_global_descriptor( + descriptor, + fhir_resource_type, + global_id=global_id, + study_global_id=study_global_id, + dewrangle_study_id=study_id, + download_all=download_all, + output_dir=output_dir, + output_filepath=output_filepath, + ) + @click.command() @click.option( diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py index 7c34404..1a4c55d 100644 --- a/d3b_api_client_cli/dewrangle/global_id.py +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -8,12 +8,15 @@ import logging import os +import pandas + from d3b_api_client_cli.dewrangle.graphql import study as study_api from d3b_api_client_cli.dewrangle.rest.files import download_file from d3b_api_client_cli.config import ( config, - ROOT_DATA_DIR + ROOT_DATA_DIR, + FhirResourceType ) from d3b_api_client_cli.dewrangle.rest import ( upload_study_file, @@ -35,6 +38,68 @@ class GlobalIdDescriptorOptions(Enum): DOWNLOAD_MOST_RECENT = "most-recent" +def upsert_and_download_global_descriptor( + descriptor: str, + fhir_resource_type: FhirResourceType, + global_id: Optional[str] = None, + study_global_id: Optional[str] = None, + dewrangle_study_id: Optional[str] = None, + skip_unavailable_descriptors: Optional[bool] = True, + download_all: Optional[bool] = True, + output_dir: Optional[str] = None, + output_filepath: Optional[str] = None, +) -> str: + """ + Upsert a single global descriptor and download created/updated + global descriptors and ID from Dewrangle + + Args: + See upsert_global_descriptors and + d3b_api_client_cli.dewrangle.rest.download_global_descriptors + + Options: + See upsert_global_descriptors and + d3b_api_client_cli.dewrangle.rest.download_global_descriptors + + Returns: + filepath: path to downloaded global ID descriptors + """ + if not output_dir: + output_dir = os.path.join(ROOT_DATA_DIR) + os.makedirs(output_dir, exist_ok=True) + + if study_global_id: + s_id = study_global_id + else: + s_id = dewrangle_study_id + + filepath = os.path.join(output_dir, f"global-descriptors-{s_id}.csv") + + logger.info("✏️ Preparing to upsert single global descriptor ...") + logger.info( + "Writing parameters to file %s", filepath + ) + + row = { + "descriptor": descriptor, + "fhirResourceType": fhir_resource_type + } + if global_id: + row["globalId"] = global_id + + pandas.DataFrame([row]).to_csv(filepath, index=False) + + return upsert_and_download_global_descriptors( + filepath, + study_global_id=study_global_id, + dewrangle_study_id=dewrangle_study_id, + skip_unavailable_descriptors=skip_unavailable_descriptors, + download_all=download_all, + output_dir=output_dir, + output_filepath=output_filepath, + ) + + def upsert_and_download_global_descriptors( input_filepath: str, study_global_id: Optional[str] = None, From e65cc4f1e1aac1de7befa0560a570ad0e1a771cc Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 18:49:53 -0500 Subject: [PATCH 21/23] :white_check_mark: Test single descriptor upsert --- .../integration/dewrangle/test_global_ids.py | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/tests/integration/dewrangle/test_global_ids.py b/tests/integration/dewrangle/test_global_ids.py index bfade7f..97481bd 100644 --- a/tests/integration/dewrangle/test_global_ids.py +++ b/tests/integration/dewrangle/test_global_ids.py @@ -11,7 +11,8 @@ from d3b_api_client_cli.cli.dewrangle.global_id_commands import ( upsert_global_descriptors, download_global_descriptors, - upsert_and_download_global_descriptors + upsert_and_download_global_descriptors, + upsert_and_download_global_descriptor, ) from d3b_api_client_cli.dewrangle.global_id import ( upsert_global_descriptors as _upsert_global_descriptors @@ -143,3 +144,40 @@ def test_download_all_descriptors(dewrangle_study): # Should have double the descriptors plus one for the study assert df.shape[0] == 21 + + +def test_one_upsert_and_download_global_descriptor( + downloaded_global_descriptors +): + """ + Test d3b-clients dewrangle upsert-and-download-global-descriptor + """ + study_id, filepath = downloaded_global_descriptors + output_dir = os.path.dirname(filepath) + + # Get an existing global ID + df = pandas.read_csv(filepath) + row = df.to_dict(orient="records")[0] + + runner = CliRunner() + + # Upsert and download the descriptors + result = runner.invoke( + upsert_and_download_global_descriptor, + [ + "--descriptor", + "foo", + "--fhir-resource-type", + row["fhirResourceType"], + "--global-id", + row["globalId"], + "--study-id", study_id, + "--output-dir", output_dir + ], + standalone_mode=False, + ) + assert result.exit_code == 0 + filepath = result.return_value + + df = pandas.read_csv(filepath) + assert df.shape[0] == 1 From 6fc0e880868bec2175b62bc73f81741af3c75f3a Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 12 Feb 2025 18:55:40 -0500 Subject: [PATCH 22/23] :rotating_light: Fix black formatter errors --- d3b_api_client_cli/cli/__init__.py | 2 +- .../cli/dewrangle/global_id_commands.py | 51 ++++++++++--------- .../cli/faker/global_id_commands.py | 37 +++++++------- d3b_api_client_cli/config/__init__.py | 13 ++--- d3b_api_client_cli/dewrangle/global_id.py | 48 +++++++---------- .../dewrangle/graphql/study/__init__.py | 8 +-- .../dewrangle/graphql/study/mutations.py | 1 - d3b_api_client_cli/dewrangle/rest/files.py | 13 ++--- d3b_api_client_cli/faker/__init__.py | 1 - d3b_api_client_cli/faker/global_id.py | 40 +++++++-------- tests/conftest.py | 3 +- .../integration/dewrangle/test_global_ids.py | 36 ++++++------- tests/unit/dewrangle/test_download.py | 4 +- tests/unit/dewrangle/test_global_ids.py | 41 ++++----------- tests/unit/faker/test_fake_global_ids.py | 34 +++++-------- 15 files changed, 142 insertions(+), 190 deletions(-) diff --git a/d3b_api_client_cli/cli/__init__.py b/d3b_api_client_cli/cli/__init__.py index 52acf25..e268080 100644 --- a/d3b_api_client_cli/cli/__init__.py +++ b/d3b_api_client_cli/cli/__init__.py @@ -13,7 +13,7 @@ @click.group() def faker(): """ - Group of lower level CLI commands related to generating fake data + Group of lower level CLI commands related to generating fake data """ diff --git a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py index 4d9a702..654cd90 100644 --- a/d3b_api_client_cli/cli/dewrangle/global_id_commands.py +++ b/d3b_api_client_cli/cli/dewrangle/global_id_commands.py @@ -24,30 +24,30 @@ "--output-filepath", type=click.Path(exists=False, file_okay=True, dir_okay=False), help="If provided, download the file to this path. This takes " - "precedence over the --output-dir option" + "precedence over the --output-dir option", ) @click.option( "--output-dir", default=os.getcwd(), type=click.Path(exists=True, file_okay=False, dir_okay=True), help="If provided, download the file with the default file name into " - "this directory" + "this directory", ) @click.option( "--download-all", is_flag=True, help="What descriptor(s) for each global ID to download. Either download" - " all descriptors for each global ID or just the most recent" + " all descriptors for each global ID or just the most recent", ) @click.option( "--study-global-id", help="The global ID of the study in Dewrangle. You must provide either " - "the global ID of the study OR the GraphQL ID of the study but not both" + "the global ID of the study OR the GraphQL ID of the study but not both", ) @click.option( "--study-id", help="The GraphQL ID of the study in Dewrangle. You must provide either " - "the global ID of the study OR the GraphQL ID of the study but not both" + "the global ID of the study OR the GraphQL ID of the study but not both", ) @click.option( "--global-id", @@ -72,10 +72,10 @@ def upsert_and_download_global_descriptor( study_global_id, download_all, output_dir, - output_filepath + output_filepath, ): """ - Send request to upsert one global ID descriptor in Dewrangle and + Send request to upsert one global ID descriptor in Dewrangle and download the resulting global ID descriptors. In order to create new global IDs provide: @@ -114,41 +114,45 @@ def upsert_and_download_global_descriptor( "--output-filepath", type=click.Path(exists=False, file_okay=True, dir_okay=False), help="If provided, download the file to this path. This takes " - "precedence over the --output-dir option" + "precedence over the --output-dir option", ) @click.option( "--output-dir", default=os.getcwd(), type=click.Path(exists=True, file_okay=False, dir_okay=True), help="If provided, download the file with the default file name into " - "this directory" + "this directory", ) @click.option( "--download-all", is_flag=True, help="What descriptor(s) for each global ID to download. Either download" - " all descriptors for each global ID or just the most recent" + " all descriptors for each global ID or just the most recent", ) @click.option( "--study-global-id", help="The global ID of the study in Dewrangle. You must provide either " - "the global ID of the study OR the GraphQL ID of the study but not both" + "the global ID of the study OR the GraphQL ID of the study but not both", ) @click.option( "--study-id", help="The GraphQL ID of the study in Dewrangle. You must provide either " - "the global ID of the study OR the GraphQL ID of the study but not both" + "the global ID of the study OR the GraphQL ID of the study but not both", ) @click.argument( "input_filepath", type=click.Path(exists=False, file_okay=True, dir_okay=False), ) def upsert_and_download_global_descriptors( - input_filepath, study_id, study_global_id, download_all, output_dir, - output_filepath + input_filepath, + study_id, + study_global_id, + download_all, + output_dir, + output_filepath, ): """ - Send request to upsert global ID descriptors in Dewrangle and + Send request to upsert global ID descriptors in Dewrangle and download the resulting global ID descriptors. In order to create new global IDs provide a CSV file with the columns: @@ -185,12 +189,12 @@ def upsert_and_download_global_descriptors( @click.option( "--study-global-id", help="The global ID of the study in Dewrangle. You must provide either " - "the global ID of the study OR the GraphQL ID of the study but not both" + "the global ID of the study OR the GraphQL ID of the study but not both", ) @click.option( "--study-id", help="The GraphQL ID of the study in Dewrangle. You must provide either " - "the global ID of the study OR the GraphQL ID of the study but not both" + "the global ID of the study OR the GraphQL ID of the study but not both", ) @click.argument( "filepath", @@ -229,33 +233,32 @@ def upsert_global_descriptors(filepath, study_id, study_global_id): default=os.getcwd(), type=click.Path(exists=True, file_okay=False, dir_okay=True), help="If provided, download the file with the default file name into " - "this directory" + "this directory", ) @click.option( "--download-all", is_flag=True, help="What descriptor(s) for each global ID to download. Either download" - " all descriptors for each global ID or just the most recent" + " all descriptors for each global ID or just the most recent", ) @click.option( - "--job-id", - help="Dewrangle job id from the upsert_global_descriptors cmd" + "--job-id", help="Dewrangle job id from the upsert_global_descriptors cmd" ) @click.option( "--study-global-id", help="The global ID of the study in Dewrangle. You must provide either " - "the global ID of the study OR the GraphQL ID of the study but not both" + "the global ID of the study OR the GraphQL ID of the study but not both", ) @click.option( "--study-id", help="The GraphQL ID of the study in Dewrangle. You must provide either " - "the global ID of the study OR the GraphQL ID of the study but not both" + "the global ID of the study OR the GraphQL ID of the study but not both", ) @click.option( "--filepath", type=click.Path(exists=False, file_okay=True, dir_okay=False), help="If provided, download the file to this filepath. This takes " - "precedence over --output-dir" + "precedence over --output-dir", ) def download_global_descriptors( filepath, study_id, study_global_id, job_id, download_all, output_dir diff --git a/d3b_api_client_cli/cli/faker/global_id_commands.py b/d3b_api_client_cli/cli/faker/global_id_commands.py index b623cf9..480651a 100644 --- a/d3b_api_client_cli/cli/faker/global_id_commands.py +++ b/d3b_api_client_cli/cli/faker/global_id_commands.py @@ -1,73 +1,74 @@ """ Commands to generate fake global ID descriptors """ + import os import logging import click -from d3b_api_client_cli.config import ( - log, FHIR_RESOURCE_TYPES, FhirResourceType -) +from d3b_api_client_cli.config import log, FHIR_RESOURCE_TYPES, FhirResourceType from d3b_api_client_cli.faker.global_id import ( - generate_global_id_file as _generate_global_id_file + generate_global_id_file as _generate_global_id_file, ) logger = logging.getLogger(__name__) -DEFAULT_FHIR_RESOURCE_TYPE: FhirResourceType = FHIR_RESOURCE_TYPES["DocumentReference"] +DEFAULT_FHIR_RESOURCE_TYPE: FhirResourceType = FHIR_RESOURCE_TYPES[ + "DocumentReference" +] @click.command() @click.option( "--output-dir", type=click.Path(exists=True, file_okay=False, dir_okay=True), - help="Where the output file will be written" + help="Where the output file will be written", ) @click.option( "--fhir-resource-type", default=DEFAULT_FHIR_RESOURCE_TYPE.resource_type, type=click.Choice([rt for rt in FHIR_RESOURCE_TYPES.keys()]), - help="What the fhirResourceType column will be populated with" + help="What the fhirResourceType column will be populated with", ) @click.option( "--with-global-ids", is_flag=True, - help="Whether or not to generate a globalId column" + help="Whether or not to generate a globalId column", ) @click.option( "--starting-index", type=int, default=0, - help="Determines what index the sequential descriptors start at" + help="Determines what index the sequential descriptors start at", ) @click.option( "--total-rows", type=int, default=10, - help="Total number of rows to generate" + help="Total number of rows to generate", ) def generate_global_id_file( total_rows, starting_index, with_global_ids, fhir_resource_type, output_dir ): """ - Generate a csv file with global IDs and descriptors. + Generate a csv file with global IDs and descriptors. \b - Descriptors are formatted like: + Descriptors are formatted like: \b - <2 char prefix for resource type>-000 - - Example: For a DocumentReference FHIR resource type the + - Example: For a DocumentReference FHIR resource type the descriptors would look like `dr-1000` \b When starting_index is supplied it will be added to the row index. \b - - Example: row 0, starting_index=255, descriptor = dr-25500 - - Example: row 1, starting_index=255, descriptor = dr-25600 + - Example: row 0, starting_index=255, descriptor = dr-25500 + - Example: row 1, starting_index=255, descriptor = dr-25600 \b - The starting_index allows a developer to have some control over the - descriptors that get generated so they can test create, replace, and + The starting_index allows a developer to have some control over the + descriptors that get generated so they can test create, replace, and append functions for global IDs. """ @@ -78,5 +79,5 @@ def generate_global_id_file( total_rows=total_rows, starting_index=starting_index, with_global_ids=with_global_ids, - output_dir=output_dir + output_dir=output_dir, ) diff --git a/d3b_api_client_cli/config/__init__.py b/d3b_api_client_cli/config/__init__.py index 5907c39..0c95cbd 100644 --- a/d3b_api_client_cli/config/__init__.py +++ b/d3b_api_client_cli/config/__init__.py @@ -32,17 +32,17 @@ @dataclass class FhirResourceType: """ - Wrapper class to define a FHIR resource type along with a global ID + Wrapper class to define a FHIR resource type along with a global ID prefix """ + resource_type: str id_prefix: str FHIR_RESOURCE_TYPES: dict = { resource_type: FhirResourceType(resource_type, prefix) - for resource_type, prefix in - [("DocumentReference", "dr")] + for resource_type, prefix in [("DocumentReference", "dr")] } @@ -89,12 +89,7 @@ def check_dewrangle_http_config(): "credential_type": "AWS", "billing_group_id": os.environ.get("CAVATICA_BILLING_GROUP_ID"), }, - "faker": { - "global_id": { - "fhir_resource_types": FHIR_RESOURCE_TYPES - } - - }, + "faker": {"global_id": {"fhir_resource_types": FHIR_RESOURCE_TYPES}}, "aws": { "region": os.environ.get("AWS_DEFAULT_REGION") or "us-east-1", "s3": { diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py index 1a4c55d..a85696c 100644 --- a/d3b_api_client_cli/dewrangle/global_id.py +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -13,11 +13,7 @@ from d3b_api_client_cli.dewrangle.graphql import study as study_api from d3b_api_client_cli.dewrangle.rest.files import download_file -from d3b_api_client_cli.config import ( - config, - ROOT_DATA_DIR, - FhirResourceType -) +from d3b_api_client_cli.config import config, ROOT_DATA_DIR, FhirResourceType from d3b_api_client_cli.dewrangle.rest import ( upload_study_file, ) @@ -32,8 +28,9 @@ class GlobalIdDescriptorOptions(Enum): """ - Used in download_global_descriptors + Used in download_global_descriptors """ + DOWNLOAD_ALL_DESC = "all" DOWNLOAD_MOST_RECENT = "most-recent" @@ -50,7 +47,7 @@ def upsert_and_download_global_descriptor( output_filepath: Optional[str] = None, ) -> str: """ - Upsert a single global descriptor and download created/updated + Upsert a single global descriptor and download created/updated global descriptors and ID from Dewrangle Args: @@ -76,14 +73,9 @@ def upsert_and_download_global_descriptor( filepath = os.path.join(output_dir, f"global-descriptors-{s_id}.csv") logger.info("✏️ Preparing to upsert single global descriptor ...") - logger.info( - "Writing parameters to file %s", filepath - ) + logger.info("Writing parameters to file %s", filepath) - row = { - "descriptor": descriptor, - "fhirResourceType": fhir_resource_type - } + row = {"descriptor": descriptor, "fhirResourceType": fhir_resource_type} if global_id: row["globalId"] = global_id @@ -110,7 +102,7 @@ def upsert_and_download_global_descriptors( output_filepath: Optional[str] = None, ) -> str: """ - Send request to upsert global descriptors and download created/updated + Send request to upsert global descriptors and download created/updated global descriptors and ID from Dewrangle Args: @@ -167,11 +159,11 @@ def upsert_global_descriptors( descriptor already having a global ID assigned will be ignored Options: - - study_global_id - Provide this when you don't know the study's + - study_global_id - Provide this when you don't know the study's GraphQL ID in Dewrangle. - study_id - Study GraphQL ID in Dewrangle - You must provide either the study_global_id OR the study_id but not both + You must provide either the study_global_id OR the study_id but not both Raise: ValueError if the study does not exist in Dewrangle @@ -193,7 +185,8 @@ def upsert_global_descriptors( logger.info( "🛸 Upsert global IDs in %s to Dewrangle for study %s", - filepath, study_global_id + filepath, + study_global_id, ) filepath = os.path.abspath(filepath) @@ -212,8 +205,7 @@ def upsert_global_descriptors( # Trigger global descriptor upsert mutation resp = study_api.upsert_global_descriptors( - study_file_id, - skip_unavailable_descriptors=skip_unavailable_descriptors + study_file_id, skip_unavailable_descriptors=skip_unavailable_descriptors ) result = resp["globalDescriptorUpsert"] job_id = result["job"]["id"] @@ -221,8 +213,7 @@ def upsert_global_descriptors( result["study_id"] = study["id"] logger.info( - "✅ Completed request to upsert global descriptors. Job ID: %s", - job_id + "✅ Completed request to upsert global descriptors. Job ID: %s", job_id ) return result @@ -244,12 +235,12 @@ def download_global_descriptors( - filepath: GraphQL ID of study in Dewrangle Options: - - job_id: The job ID returned from the upsert_global_descriptors + - job_id: The job ID returned from the upsert_global_descriptors method. If this is provided, only global IDs from that job will be returned. - - download_all: Determines how many descriptors - will be returned for the global ID. + - download_all: Determines how many descriptors + will be returned for the global ID. If True, return all descriptors associated with the global ID @@ -302,14 +293,11 @@ def download_global_descriptors( " Params: %s", study_global_id, url, - pformat(params) + pformat(params), ) filepath = download_file( - url, - output_dir=output_dir, - filepath=filepath, - params=params + url, output_dir=output_dir, filepath=filepath, params=params ) logger.info("✅ Completed download of global IDs: %s", filepath) diff --git a/d3b_api_client_cli/dewrangle/graphql/study/__init__.py b/d3b_api_client_cli/dewrangle/graphql/study/__init__.py index 1919600..5ed1bec 100644 --- a/d3b_api_client_cli/dewrangle/graphql/study/__init__.py +++ b/d3b_api_client_cli/dewrangle/graphql/study/__init__.py @@ -33,8 +33,7 @@ def upsert_global_descriptors( - study_file_id: str, - skip_unavailable_descriptors: Optional[bool] = True + study_file_id: str, skip_unavailable_descriptors: Optional[bool] = True ) -> dict: """ Trigger the operation to upsert global descriptors in Dewrangle @@ -55,8 +54,9 @@ def upsert_global_descriptors( key = "globalDescriptorUpsert" mutation_errors = resp.get(key, {}).get("errors") - job_errors = resp.get(key, {}).get( - "job", {}).get("errors", {}).get("edges", []) + job_errors = ( + resp.get(key, {}).get("job", {}).get("errors", {}).get("edges", []) + ) if mutation_errors or job_errors: logger.error("❌ %s for study failed", key) diff --git a/d3b_api_client_cli/dewrangle/graphql/study/mutations.py b/d3b_api_client_cli/dewrangle/graphql/study/mutations.py index a30888b..48c96d3 100644 --- a/d3b_api_client_cli/dewrangle/graphql/study/mutations.py +++ b/d3b_api_client_cli/dewrangle/graphql/study/mutations.py @@ -108,4 +108,3 @@ } """ ) - diff --git a/d3b_api_client_cli/dewrangle/rest/files.py b/d3b_api_client_cli/dewrangle/rest/files.py index ed16534..eb082eb 100644 --- a/d3b_api_client_cli/dewrangle/rest/files.py +++ b/d3b_api_client_cli/dewrangle/rest/files.py @@ -13,7 +13,7 @@ DEWRANGLE_DEV_PAT, config, check_dewrangle_http_config, - ROOT_DATA_DIR + ROOT_DATA_DIR, ) from d3b_api_client_cli.utils import send_request, timestamp @@ -33,11 +33,7 @@ def _filename_from_headers(headers: dict) -> str: return params.get("filename") -def upload_file( - url: str, - filepath: str, - params: Optional[dict] = None -): +def upload_file(url: str, filepath: str, params: Optional[dict] = None): """ Upload a file to Dewrangle """ @@ -64,7 +60,7 @@ def download_file( url: str, output_dir: Optional[str] = None, filepath: Optional[str] = None, - params: Optional[dict] = None + params: Optional[dict] = None, ) -> str: """ Download a file from Dewrangle @@ -84,8 +80,7 @@ def download_file( output_dir = os.path.join(ROOT_DATA_DIR) os.makedirs(output_dir, exist_ok=True) - headers = {"x-api-key": DEWRANGLE_DEV_PAT, - "content-type": CSV_CONTENT_TYPE} + headers = {"x-api-key": DEWRANGLE_DEV_PAT, "content-type": CSV_CONTENT_TYPE} resp = send_request( "get", url, diff --git a/d3b_api_client_cli/faker/__init__.py b/d3b_api_client_cli/faker/__init__.py index 098f26e..6a44789 100644 --- a/d3b_api_client_cli/faker/__init__.py +++ b/d3b_api_client_cli/faker/__init__.py @@ -1,4 +1,3 @@ """ Package dedicated to generating fake data needed for development and testing """ - diff --git a/d3b_api_client_cli/faker/global_id.py b/d3b_api_client_cli/faker/global_id.py index 7d87006..b899cfb 100644 --- a/d3b_api_client_cli/faker/global_id.py +++ b/d3b_api_client_cli/faker/global_id.py @@ -10,13 +10,14 @@ import pandas from d3b_api_client_cli.config import ( - config, FhirResourceType, ROOT_FAKE_DATA_DIR + config, + FhirResourceType, + ROOT_FAKE_DATA_DIR, ) -FHIR_RESOURCE_TYPES: dict[ - str, - FhirResourceType -] = config["faker"]["global_id"]["fhir_resource_types"] +FHIR_RESOURCE_TYPES: dict[str, FhirResourceType] = config["faker"]["global_id"][ + "fhir_resource_types" +] DEFAULT_FHIR_RESOURCE_TYPE: str = "DocumentReference" logger = logging.getLogger(__name__) @@ -47,44 +48,43 @@ def generate_global_id_file( with_global_ids: Optional[bool] = False, total_rows: Optional[int] = 10, starting_index: Optional[int] = 0, - output_dir: Optional[str] = None + output_dir: Optional[str] = None, ) -> str: """ - Generate a csv file with global IDs and descriptors. + Generate a csv file with global IDs and descriptors. - Descriptors are formatted like: + Descriptors are formatted like: - <2 char prefix for resource type>-000 - Example: dr-1000 When starting_index is supplied it will be added to the row index. - - Example: row 0, starting_index=255, descriptor = dr-25500 - - Example: row 1, starting_index=255, descriptor = dr-25600 + - Example: row 0, starting_index=255, descriptor = dr-25500 + - Example: row 1, starting_index=255, descriptor = dr-25600 - The starting_index allows a developer to have some control over the - descriptors that get generated so they can test create, replace, and + The starting_index allows a developer to have some control over the + descriptors that get generated so they can test create, replace, and append functions for descriptors. Options: - fhir_resource_type: the FHIR resource type and global ID prefix to populate the file with - - with_global_ids: Whether or not to include a column for global IDs + - with_global_ids: Whether or not to include a column for global IDs if global IDs are not included and this file is used in - upsert_global_descriptors, then new global IDs will be created by + upsert_global_descriptors, then new global IDs will be created by Dewrangle - total_rows: Number of rows to generate - - starting_index: Used in generating sequential descriptors. + - starting_index: Used in generating sequential descriptors. Returns: Path to file """ logger.info( - "🏭 Generating %s rows for fake global ID descriptors file", - total_rows + "🏭 Generating %s rows for fake global ID descriptors file", total_rows ) if not output_dir: output_dir = ROOT_FAKE_DATA_DIR @@ -101,7 +101,7 @@ def generate_global_id_file( descriptor_suffix = global_id.split("-")[-1] row = { "fhirResourceType": fhir_resource_type.resource_type, - "descriptor": f"{rt}-{descriptor_suffix}" + "descriptor": f"{rt}-{descriptor_suffix}", } if with_global_ids: row["globalId"] = global_id @@ -114,8 +114,6 @@ def generate_global_id_file( filepath = os.path.join(output_dir, "fake_global_descriptors.csv") df.to_csv(filepath, index=False) - logger.info( - "✅ Completed writing global ID descriptors to %s", filepath - ) + logger.info("✅ Completed writing global ID descriptors to %s", filepath) return filepath diff --git a/tests/conftest.py b/tests/conftest.py index 77f0fcf..2f4542e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,8 +94,7 @@ def dewrangle_study(dewrangle_org, study_file): fp = study_file() runner = CliRunner() - result = runner.invoke( - upsert_study, [fp, org["id"]], standalone_mode=False) + result = runner.invoke(upsert_study, [fp, org["id"]], standalone_mode=False) return result.return_value, fp diff --git a/tests/integration/dewrangle/test_global_ids.py b/tests/integration/dewrangle/test_global_ids.py index 97481bd..22b73ab 100644 --- a/tests/integration/dewrangle/test_global_ids.py +++ b/tests/integration/dewrangle/test_global_ids.py @@ -15,7 +15,7 @@ upsert_and_download_global_descriptor, ) from d3b_api_client_cli.dewrangle.global_id import ( - upsert_global_descriptors as _upsert_global_descriptors + upsert_global_descriptors as _upsert_global_descriptors, ) from d3b_api_client_cli.faker.global_id import ( generate_global_id_file, @@ -25,7 +25,7 @@ @pytest.fixture(scope="session") def upserted_global_descriptors(dewrangle_study): """ - Upsert global descriptors + Upsert global descriptors """ study, fp = dewrangle_study output_dir = os.path.dirname(fp) @@ -59,8 +59,12 @@ def downloaded_global_descriptors(upserted_global_descriptors): result = runner.invoke( download_global_descriptors, [ - "--study-id", study_id, "--job-id", job_id, - "--output-dir", output_dir + "--study-id", + study_id, + "--job-id", + job_id, + "--output-dir", + output_dir, ], standalone_mode=False, ) @@ -96,9 +100,7 @@ def test_upsert_and_download_global_descriptors(downloaded_global_descriptors): # Update the descriptors df = pandas.read_csv(filepath) df = df[[c for c in ("fhirResourceType", "descriptor", "globalId")]] - df["descriptor"] = df["descriptor"].apply( - lambda d: d + "1" - ) + df["descriptor"] = df["descriptor"].apply(lambda d: d + "1") df.to_csv(filepath, index=False) runner = CliRunner() @@ -106,11 +108,7 @@ def test_upsert_and_download_global_descriptors(downloaded_global_descriptors): # Upsert and download the descriptors result = runner.invoke( upsert_and_download_global_descriptors, - [ - filepath, - "--study-id", study_id, - "--output-dir", output_dir - ], + [filepath, "--study-id", study_id, "--output-dir", output_dir], standalone_mode=False, ) assert result.exit_code == 0 @@ -131,9 +129,11 @@ def test_download_all_descriptors(dewrangle_study): result = runner.invoke( download_global_descriptors, [ - "--study-id", study["id"], + "--study-id", + study["id"], "--download-all", - "--output-dir", output_dir + "--output-dir", + output_dir, ], standalone_mode=False, ) @@ -147,7 +147,7 @@ def test_download_all_descriptors(dewrangle_study): def test_one_upsert_and_download_global_descriptor( - downloaded_global_descriptors + downloaded_global_descriptors, ): """ Test d3b-clients dewrangle upsert-and-download-global-descriptor @@ -171,8 +171,10 @@ def test_one_upsert_and_download_global_descriptor( row["fhirResourceType"], "--global-id", row["globalId"], - "--study-id", study_id, - "--output-dir", output_dir + "--study-id", + study_id, + "--output-dir", + output_dir, ], standalone_mode=False, ) diff --git a/tests/unit/dewrangle/test_download.py b/tests/unit/dewrangle/test_download.py index 6005a28..a0e26d5 100644 --- a/tests/unit/dewrangle/test_download.py +++ b/tests/unit/dewrangle/test_download.py @@ -84,9 +84,7 @@ def test_download_job_errors(mocker): ) -@pytest.mark.parametrize( - "download_method", [files.download_job_errors] -) +@pytest.mark.parametrize("download_method", [files.download_job_errors]) @pytest.mark.parametrize( "token,url, expected_msg", [ diff --git a/tests/unit/dewrangle/test_global_ids.py b/tests/unit/dewrangle/test_global_ids.py index ec95edf..e061f03 100644 --- a/tests/unit/dewrangle/test_global_ids.py +++ b/tests/unit/dewrangle/test_global_ids.py @@ -6,7 +6,7 @@ from click.testing import CliRunner from d3b_api_client_cli.cli.dewrangle.global_id_commands import ( - upsert_global_descriptors + upsert_global_descriptors, ) from d3b_api_client_cli.dewrangle.global_id import ( upsert_global_descriptors as _upsert_global_descriptors, @@ -33,20 +33,13 @@ def test_upsert_global_descriptors_cli_errors(): @pytest.mark.parametrize( "kwargs", [ - { - "dewrangle_study_id": None, - "study_global_id": "foo" - }, - { - "dewrangle_study_id": "foo", - "study_global_id": None - } - - ] + {"dewrangle_study_id": None, "study_global_id": "foo"}, + {"dewrangle_study_id": "foo", "study_global_id": None}, + ], ) def test_upsert_global_descriptors_no_study(mocker, kwargs): """ - Test d3b-clients dewrangle upsert-global-descriptors when study + Test d3b-clients dewrangle upsert-global-descriptors when study is not found """ mock_study_api = mocker.patch( @@ -56,30 +49,20 @@ def test_upsert_global_descriptors_no_study(mocker, kwargs): mock_study_api.find_study.return_value = {} with pytest.raises(ValueError) as e: - _upsert_global_descriptors( - "global_ids.csv", - **kwargs - ) + _upsert_global_descriptors("global_ids.csv", **kwargs) assert "does not exist" in str(e) @pytest.mark.parametrize( "kwargs", [ - { - "dewrangle_study_id": None, - "study_global_id": "foo" - }, - { - "dewrangle_study_id": "foo", - "study_global_id": None - } - - ] + {"dewrangle_study_id": None, "study_global_id": "foo"}, + {"dewrangle_study_id": "foo", "study_global_id": None}, + ], ) def test_download_global_descriptors_no_study(mocker, kwargs): """ - Test d3b-clients dewrangle download-global-descriptors when study + Test d3b-clients dewrangle download-global-descriptors when study is not found """ mock_study_api = mocker.patch( @@ -89,7 +72,5 @@ def test_download_global_descriptors_no_study(mocker, kwargs): mock_study_api.find_study.return_value = {} with pytest.raises(ValueError) as e: - _download_global_descriptors( - **kwargs - ) + _download_global_descriptors(**kwargs) assert "does not exist" in str(e) diff --git a/tests/unit/faker/test_fake_global_ids.py b/tests/unit/faker/test_fake_global_ids.py index 80c9383..e1594f6 100644 --- a/tests/unit/faker/test_fake_global_ids.py +++ b/tests/unit/faker/test_fake_global_ids.py @@ -1,6 +1,7 @@ """ Test generating fake data for global ID commands """ + import pytest from click.testing import CliRunner import pandas @@ -8,19 +9,12 @@ from d3b_api_client_cli.cli.faker.global_id_commands import * from d3b_api_client_cli.faker.global_id import ( generate_global_id_file as _generate_global_id_file, - DEFAULT_FHIR_RESOURCE_TYPE + DEFAULT_FHIR_RESOURCE_TYPE, ) @pytest.mark.parametrize( - "kwargs,error_msg", - [ - ( - { - "fhir_resource_type": "foo" - }, "BadParameter" - ) - ] + "kwargs,error_msg", [({"fhir_resource_type": "foo"}, "BadParameter")] ) def test_generate_global_ids_errors(kwargs, error_msg): """ @@ -55,20 +49,20 @@ def test_generate_global_ids(tmp_path): assert c in df.columns assert df["fhirResourceType"].eq(DEFAULT_FHIR_RESOURCE_TYPE).all() - assert df["descriptor"].apply( - lambda d: int(d.split("-")[-1]) - ).ge(250000).all() + assert ( + df["descriptor"].apply(lambda d: int(d.split("-")[-1])).ge(250000).all() + ) # Without global IDs filepath = _generate_global_id_file( - output_dir=temp_dir, - with_global_ids=False + output_dir=temp_dir, with_global_ids=False ) df = pandas.read_csv(filepath) assert "globalId" not in df.columns - assert df["descriptor"].apply( - lambda d: int(d.split("-")[-1]) - ).ge(0).all() - assert df["descriptor"].apply( - lambda d: int(d.split("-")[-1]) - ).le(9000000000).all() + assert df["descriptor"].apply(lambda d: int(d.split("-")[-1])).ge(0).all() + assert ( + df["descriptor"] + .apply(lambda d: int(d.split("-")[-1])) + .le(9000000000) + .all() + ) From d9cd44582524b9d1c5bff4d2c75517beb10bfe4e Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Fri, 14 Feb 2025 13:51:37 -0500 Subject: [PATCH 23/23] :recycle: Simplify if statement --- d3b_api_client_cli/dewrangle/global_id.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/d3b_api_client_cli/dewrangle/global_id.py b/d3b_api_client_cli/dewrangle/global_id.py index a85696c..feed126 100644 --- a/d3b_api_client_cli/dewrangle/global_id.py +++ b/d3b_api_client_cli/dewrangle/global_id.py @@ -65,10 +65,7 @@ def upsert_and_download_global_descriptor( output_dir = os.path.join(ROOT_DATA_DIR) os.makedirs(output_dir, exist_ok=True) - if study_global_id: - s_id = study_global_id - else: - s_id = dewrangle_study_id + s_id = study_global_id if (study_global_id) else dewrangle_study_id filepath = os.path.join(output_dir, f"global-descriptors-{s_id}.csv")