From 4568357ac443f20a1c3bdd5436f80f89b74ef345 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 24 Feb 2026 13:33:11 -0800 Subject: [PATCH 01/11] updated description --- synapseclient/models/recordset.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapseclient/models/recordset.py b/synapseclient/models/recordset.py index 87bb877ef..854b5cd43 100644 --- a/synapseclient/models/recordset.py +++ b/synapseclient/models/recordset.py @@ -360,7 +360,12 @@ def get_detailed_validation_results( @dataclass() @async_to_sync class RecordSet(RecordSetSynchronousProtocol, AccessControllable, BaseJSONSchema): - """A RecordSet within Synapse. + """ + A RecordSet entity captures record-based metadata as a special type of CSV. + The record set content can be curated using the grid services. + When a grid is created from a record set, its data can be exported back to a new version of the record set. + The export will include the validation summary as well as a validation file handle that + contains detailed validation results for each row in the record set. Attributes: id: The unique immutable ID for this file. A new ID will be generated for new From 8727204b382277a1006de5080cf1677b25da2007 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 24 Feb 2026 13:33:59 -0800 Subject: [PATCH 02/11] deprecated project_id parameter --- .../curator/record_based_metadata_task.py | 43 ++++++++-- .../test_record_based_metadata_task.py | 41 +++++++++ .../extensions/unit_test_curator.py | 86 ++++++++++++------- 3 files changed, 132 insertions(+), 38 deletions(-) create mode 100644 tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index 7274ffdde..cb3c8d54c 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -15,9 +15,11 @@ CurationTask, Grid, JSONSchema, + Project, RecordBasedMetadataTaskProperties, RecordSet, ) +from synapseclient.operations import get def extract_property_titles(schema_data: Dict[str, Any]) -> List[str]: @@ -99,7 +101,6 @@ def extract_schema_properties_from_web( def create_record_based_metadata_task( - project_id: str, folder_id: str, record_set_name: str, record_set_description: str, @@ -112,6 +113,7 @@ def create_record_based_metadata_task( assignee_principal_id: Optional[Union[str, int]] = None, *, synapse_client: Optional[Synapse] = None, + project_id: Optional[str] = None, # Deprecated, will be removed in v5.0.0 ) -> Tuple[RecordSet, CurationTask, Grid]: """ Generate and upload CSV templates as a RecordSet for record-based metadata, @@ -142,7 +144,6 @@ def create_record_based_metadata_task( record_set, task, grid = create_record_based_metadata_task( synapse_client=syn, - project_id="syn12345678", folder_id="syn87654321", record_set_name="BiospecimenMetadata_RecordSet", record_set_description="RecordSet for biospecimen metadata curation", @@ -155,9 +156,10 @@ def create_record_based_metadata_task( ``` Arguments: - project_id: The Synapse ID of the project where the folder exists. folder_id: The Synapse ID of the folder to upload RecordSet to. - record_set_name: Name for the RecordSet. + record_set_name: Name for the RecordSet entity that will be created. + A RecordSet entity captures record-based metadata as a special type of CSV and stores contributor + provided metadata collected via Curator enabling sharing and download of validated metadata in Synapse. record_set_description: Description for the RecordSet. curation_task_name: Name for the CurationTask (used as data_type field). Must be unique within the project, otherwise if it matches an existing @@ -177,6 +179,7 @@ def create_record_based_metadata_task( synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + project_id: Deprecated, will be removed in v5.0.0 Returns: Tuple containing the created RecordSet, CurationTask, and Grid objects @@ -186,8 +189,6 @@ def create_record_based_metadata_task( SynapseError: If there are issues with Synapse operations. """ # Validate required parameters - if not project_id: - raise ValueError("project_id is required") if not folder_id: raise ValueError("folder_id is required") if not record_set_name: @@ -203,8 +204,18 @@ def create_record_based_metadata_task( if not schema_uri: raise ValueError("schema_uri is required") + if project_id: + synapse_client.logger.warning( + "The 'project_id' parameter is deprecated and will be removed in v5.0.0. " + "The project ID will be inferred from the folder ID provided." + ) + synapse_client = Synapse.get_client(synapse_client=synapse_client) + project_id = project_id_from_entity_id( + entity_id=folder_id, synapse_client=synapse_client + ) + template_df = extract_schema_properties_from_web( syn=synapse_client, schema_uri=schema_uri ) @@ -283,3 +294,23 @@ def create_record_based_metadata_task( raise e return record_set_with_data, curation_task, curation_grid + + +def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str: + """ + Helper function to retrieve the project ID from a given entity ID by traversing up the hierarchy. + + Args: + entity_id: The Synapse ID of the entity (e.g., folder, file) to start from. + synapse_client: Authenticated Synapse client instance + """ + + # Get the project ID from the folder ID + current_obj = get(entity_id, synapse_client=synapse_client) + iter = 0 + while not isinstance(current_obj, Project): + current_obj = get(current_obj.parent_id, synapse_client=synapse_client) + iter += 1 + if iter > 1000: + raise ValueError("Could not find project ID in folder hierarchy") + return current_obj.id diff --git a/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py b/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py new file mode 100644 index 000000000..7127964bd --- /dev/null +++ b/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py @@ -0,0 +1,41 @@ +import pytest + +from synapseclient import Synapse +from synapseclient.extensions.curator.record_based_metadata_task import ( + project_id_from_entity_id, +) +from synapseclient.models import Folder, Project + + +class TestProjectIDFromEntityID: + @pytest.fixture(scope="module") + def temp_hierarchy(self, syn: Synapse, request) -> tuple[str, str, str]: + """Creates a Project -> Folder -> Folder hierarchy for testing.""" + project = Project(name="IntegrationTest_Root_Project").store(synapse_client=syn) + folder1 = Folder(name="TestFolder1", parent_id=project.id).store( + synapse_client=syn + ) + folder2 = Folder(name="TestFolder2", parent_id=folder1.id).store( + synapse_client=syn + ) + + def delete_project(): + project.delete(synapse_client=syn) + + request.addfinalizer(delete_project) + return project.id, folder1.id, folder2.id + + def test_project_id_from_folder(self, syn, temp_hierarchy): + # Test finding project from a nested file + folder_id = temp_hierarchy[2] + expected_project_id = temp_hierarchy[0] + + result = project_id_from_entity_id(folder_id, syn) + assert result == expected_project_id + + def test_project_id_from_project(self, syn, temp_hierarchy): + # Test finding project from a nested file + project_id = temp_hierarchy[0] + + result = project_id_from_entity_id(project_id, syn) + assert result == project_id diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index ebaef1e88..c22a5fe6c 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -39,6 +39,7 @@ extract_property_titles, extract_schema_properties_from_dict, extract_schema_properties_from_web, + project_id_from_entity_id, ) from synapseclient.extensions.curator.schema_generation import ( generate_jsonld, @@ -54,6 +55,7 @@ FileBasedMetadataTaskProperties, RecordBasedMetadataTaskProperties, ) +from synapseclient.models.folder import Folder from synapseclient.models.mixins import JSONSchemaBinding from synapseclient.models.mixins.json_schema import JSONSchemaVersionInfo @@ -535,6 +537,19 @@ def setUp(self): self.instructions = "Test instructions" self.schema_uri = "sage.schemas.v2571-amp.Biospecimen.schema-0.0.1" + @patch("synapseclient.extensions.curator.record_based_metadata_task.get") + def test_project_id_from_entity_id_hierarchy_limit_exceeded(self, mock_get): + """Tests the 'iter > 1000' safety break.""" + mock_folder = Folder(id="syn123", parent_id="syn456") + mock_get.return_value = mock_folder + with pytest.raises( + ValueError, match="Could not find project ID in folder hierarchy" + ): + project_id_from_entity_id(entity_id="syn123", synapse_client=self.mock_syn) + + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) @patch( "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" ) @@ -557,11 +572,14 @@ def test_create_record_based_metadata_task_success( mock_temp_file, mock_extract_schema, mock_get_client, + mock_get_project_id_from_entity_id, ): """Test successful creation of record-based metadata task.""" # GIVEN a record-based metadata task with all required components mock_get_client.return_value = self.mock_syn + mock_get_project_id_from_entity_id.return_value = self.project_id + mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) mock_extract_schema.return_value = mock_df @@ -588,7 +606,6 @@ def test_create_record_based_metadata_task_success( # WHEN I create the record-based metadata task result = create_record_based_metadata_task( - project_id=self.project_id, folder_id=self.folder_id, record_set_name=self.record_set_name, record_set_description=self.record_set_description, @@ -618,6 +635,9 @@ def test_create_record_based_metadata_task_success( synapse_client=self.mock_syn, ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) @patch( "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" ) @@ -640,10 +660,12 @@ def test_create_record_based_metadata_task_no_schema_binding( mock_temp_file, mock_extract_schema, mock_get_client, + mock_get_project_id_from_entity_id, ): """Test creation without schema binding.""" # Setup mocks mock_get_client.return_value = self.mock_syn + mock_get_project_id_from_entity_id.return_value = self.project_id mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) mock_extract_schema.return_value = mock_df @@ -671,7 +693,6 @@ def test_create_record_based_metadata_task_no_schema_binding( # Call function result = create_record_based_metadata_task( - project_id=self.project_id, folder_id=self.folder_id, record_set_name=self.record_set_name, record_set_description=self.record_set_description, @@ -686,28 +707,6 @@ def test_create_record_based_metadata_task_no_schema_binding( # Assertions mock_record_set.bind_schema.assert_not_called() - @patch( - "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" - ) - def test_create_record_based_metadata_task_missing_project_id( - self, mock_get_client - ): - """Test ValueError when project_id is missing.""" - mock_get_client.return_value = self.mock_syn - - with pytest.raises(ValueError, match="project_id is required"): - create_record_based_metadata_task( - project_id="", - folder_id=self.folder_id, - record_set_name=self.record_set_name, - record_set_description=self.record_set_description, - curation_task_name=self.curation_task_name, - upsert_keys=self.upsert_keys, - instructions=self.instructions, - schema_uri=self.schema_uri, - synapse_client=self.mock_syn, - ) - @patch( "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" ) @@ -721,7 +720,6 @@ def test_create_record_based_metadata_task_missing_upsert_keys( ValueError, match="upsert_keys is required and must be a non-empty list" ): create_record_based_metadata_task( - project_id=self.project_id, folder_id=self.folder_id, record_set_name=self.record_set_name, record_set_description=self.record_set_description, @@ -743,7 +741,6 @@ def test_create_record_based_metadata_task_missing_schema_uri( with pytest.raises(ValueError, match="schema_uri is required"): create_record_based_metadata_task( - project_id=self.project_id, folder_id=self.folder_id, record_set_name=self.record_set_name, record_set_description=self.record_set_description, @@ -754,6 +751,9 @@ def test_create_record_based_metadata_task_missing_schema_uri( synapse_client=self.mock_syn, ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) @patch( "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" ) @@ -765,10 +765,16 @@ def test_create_record_based_metadata_task_missing_schema_uri( ) @patch("builtins.open") def test_create_record_based_metadata_task_csv_write_error( - self, mock_open, mock_temp_file, mock_extract_schema, mock_get_client + self, + mock_open, + mock_temp_file, + mock_extract_schema, + mock_get_client, + mock_get_project_id_from_entity_id, ): """Test error handling during CSV file writing.""" mock_get_client.return_value = self.mock_syn + mock_get_project_id_from_entity_id.return_value = self.project_id mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) mock_extract_schema.return_value = mock_df @@ -781,7 +787,6 @@ def test_create_record_based_metadata_task_csv_write_error( with pytest.raises(Exception, match="File write error"): create_record_based_metadata_task( - project_id=self.project_id, folder_id=self.folder_id, record_set_name=self.record_set_name, record_set_description=self.record_set_description, @@ -792,6 +797,9 @@ def test_create_record_based_metadata_task_csv_write_error( synapse_client=self.mock_syn, ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) @patch( "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" ) @@ -810,9 +818,11 @@ def test_create_record_based_metadata_task_record_set_creation_error( mock_temp_file, mock_extract_schema, mock_get_client, + mock_get_project_id_from_entity_id, ): """Test error handling during RecordSet creation.""" mock_get_client.return_value = self.mock_syn + mock_get_project_id_from_entity_id.return_value = self.project_id mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) mock_extract_schema.return_value = mock_df @@ -829,7 +839,6 @@ def test_create_record_based_metadata_task_record_set_creation_error( with pytest.raises(Exception, match="RecordSet creation failed"): create_record_based_metadata_task( - project_id=self.project_id, folder_id=self.folder_id, record_set_name=self.record_set_name, record_set_description=self.record_set_description, @@ -840,6 +849,9 @@ def test_create_record_based_metadata_task_record_set_creation_error( synapse_client=self.mock_syn, ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) @patch( "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" ) @@ -860,9 +872,11 @@ def test_create_record_based_metadata_task_curation_task_creation_error( mock_temp_file, mock_extract_schema, mock_get_client, + mock_get_project_id_from_entity_id, ): """Test error handling during CurationTask creation.""" mock_get_client.return_value = self.mock_syn + mock_get_project_id_from_entity_id.return_value = self.project_id mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) mock_extract_schema.return_value = mock_df @@ -883,7 +897,6 @@ def test_create_record_based_metadata_task_curation_task_creation_error( with pytest.raises(Exception, match="CurationTask creation failed"): create_record_based_metadata_task( - project_id=self.project_id, folder_id=self.folder_id, record_set_name=self.record_set_name, record_set_description=self.record_set_description, @@ -895,6 +908,9 @@ def test_create_record_based_metadata_task_curation_task_creation_error( synapse_client=self.mock_syn, ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) @patch( "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" ) @@ -917,9 +933,11 @@ def test_create_record_based_metadata_task_grid_creation_error( mock_temp_file, mock_extract_schema, mock_get_client, + mock_get_project_id_from_entity_id, ): """Test error handling during Grid creation.""" mock_get_client.return_value = self.mock_syn + mock_get_project_id_from_entity_id.return_value = self.project_id mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) mock_extract_schema.return_value = mock_df @@ -946,7 +964,6 @@ def test_create_record_based_metadata_task_grid_creation_error( with pytest.raises(Exception, match="Grid creation failed"): create_record_based_metadata_task( - project_id=self.project_id, folder_id=self.folder_id, record_set_name=self.record_set_name, record_set_description=self.record_set_description, @@ -958,6 +975,9 @@ def test_create_record_based_metadata_task_grid_creation_error( synapse_client=self.mock_syn, ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) @patch( "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" ) @@ -980,6 +1000,7 @@ def test_create_record_based_metadata_task_with_assignee( mock_temp_file, mock_extract_schema, mock_get_client, + mock_get_project_id_from_entity_id, ): """Test successful creation of record-based metadata task with assignee_principal_id.""" # Test both string and int inputs - int should be converted to string @@ -991,6 +1012,7 @@ def test_create_record_based_metadata_task_with_assignee( for input_assignee, expected_assignee in test_cases: with self.subTest(input_assignee=input_assignee): # Reset mocks for each subtest + mock_get_project_id_from_entity_id.reset_mock() mock_open.reset_mock() mock_grid_cls.reset_mock() mock_curation_task_cls.reset_mock() @@ -1000,6 +1022,7 @@ def test_create_record_based_metadata_task_with_assignee( mock_get_client.reset_mock() # GIVEN a record-based metadata task with assignee_principal_id + mock_get_project_id_from_entity_id.return_value = self.project_id mock_get_client.return_value = self.mock_syn mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) @@ -1028,7 +1051,6 @@ def test_create_record_based_metadata_task_with_assignee( # WHEN I create the record-based metadata task with assignee_principal_id result = create_record_based_metadata_task( - project_id=self.project_id, folder_id=self.folder_id, record_set_name=self.record_set_name, record_set_description=self.record_set_description, From 57d7d438931daefe39758616547c1a26cd54ca81 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 24 Feb 2026 13:38:18 -0800 Subject: [PATCH 03/11] improve docstring --- .../extensions/curator/record_based_metadata_task.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index cb3c8d54c..aa67a7445 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -298,11 +298,17 @@ def create_record_based_metadata_task( def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str: """ - Helper function to retrieve the project ID from a given entity ID by traversing up the hierarchy. + Retrieves the project ID from a given entity ID by traversing up the the folder hierarchy Args: entity_id: The Synapse ID of the entity (e.g., folder, file) to start from. synapse_client: Authenticated Synapse client instance + + Returns: + The Synapse ID of the project that the entity belongs to. + + Raises: + ValueError: If the project ID cannot be found within 1000 iterations. """ # Get the project ID from the folder ID From 375236508e96c2f36215153e5f72ce9110fcefca Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 25 Feb 2026 08:23:35 -0800 Subject: [PATCH 04/11] refactored file_based_task_creation to use project id function --- .../curator/file_based_metadata_task.py | 21 ++---- .../extensions/unit_test_curator.py | 68 +++++-------------- 2 files changed, 22 insertions(+), 67 deletions(-) diff --git a/synapseclient/extensions/curator/file_based_metadata_task.py b/synapseclient/extensions/curator/file_based_metadata_task.py index 146409ab1..367cc994f 100644 --- a/synapseclient/extensions/curator/file_based_metadata_task.py +++ b/synapseclient/extensions/curator/file_based_metadata_task.py @@ -430,27 +430,18 @@ def create_file_based_metadata_task( synapse_client.logger.info( "Attempting to get the Synapse ID of the provided folders project." ) - try: - entity = Folder(folder_id).get(synapse_client=synapse_client) - parent = synapse_client.get(entity.parent_id) - project = None - while not project: - if parent.concreteType == "org.sagebionetworks.repo.model.Project": - project = parent - break - parent = synapse_client.get(parent.parentId) - except Exception as e: - synapse_client.logger.exception( - "Error getting the Synapse ID of the provided folders project" - ) - raise e + from synapseclient.extensions.curator.record_based_metadata_task import ( + project_id_from_entity_id, + ) + + project_id = project_id_from_entity_id(folder_id, synapse_client=synapse_client) synapse_client.logger.info("Got the Synapse ID of the provided folders project.") synapse_client.logger.info("Attempting to create the CurationTask.") try: task = CurationTask( data_type=task_datatype, - project_id=project.id, + project_id=project_id, instructions=instructions, assignee_principal_id=( str(assignee_principal_id) diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index c22a5fe6c..661eac845 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -72,11 +72,15 @@ def setUp(self): self.mock_syn = Mock(spec=Synapse) self.mock_syn.logger = Mock() self.folder_id = "syn12345678" + self.project_id = "syn11111111" self.curation_task_name = "TestCurationTask" self.instructions = "Test instructions" self.entity_view_name = "Test Entity View" self.schema_uri = "sage.schemas.v2571-amp.Biospecimen.schema-0.0.1" + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) @patch( "synapseclient.extensions.curator.file_based_metadata_task.Synapse.get_client" ) @@ -97,11 +101,13 @@ def test_create_file_based_metadata_task_success_with_schema( mock_create_wiki, mock_create_entity_view, mock_get_client, + mock_get_project_id_from_entity_id, ): """Test successful creation with schema binding.""" # GIVEN a file-based metadata task with schema binding mock_get_client.return_value = self.mock_syn mock_create_entity_view.return_value = "syn87654321" + mock_get_project_id_from_entity_id.return_value = self.project_id mock_folder = Mock() mock_folder_cls.return_value = mock_folder @@ -148,6 +154,9 @@ def test_create_file_based_metadata_task_success_with_schema( ) mock_get.assert_called_once_with(self.folder_id, synapse_client=self.mock_syn) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) @patch( "synapseclient.extensions.curator.file_based_metadata_task.Synapse.get_client" ) @@ -162,11 +171,13 @@ def test_create_file_based_metadata_task_success_without_schema_no_wiki( mock_folder_cls, mock_create_entity_view, mock_get_client, + mock_get_project_id_from_entity_id, ): """Test successful creation without schema binding and without wiki.""" # GIVEN a file-based metadata task without schema binding or wiki mock_get_client.return_value = self.mock_syn mock_create_entity_view.return_value = "syn87654321" + mock_get_project_id_from_entity_id.return_value = self.project_id mock_folder = Mock() mock_folder_cls.return_value = mock_folder @@ -382,58 +393,8 @@ def test_create_file_based_metadata_task_schema_retrieval_error( ) @patch( - "synapseclient.extensions.curator.file_based_metadata_task.Synapse.get_client" + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" ) - @patch( - "synapseclient.extensions.curator.file_based_metadata_task.create_json_schema_entity_view" - ) - @patch("synapseclient.extensions.curator.file_based_metadata_task.Folder") - def test_create_file_based_metadata_task_project_traversal( - self, mock_folder_cls, mock_create_entity_view, mock_get_client - ): - """Test project traversal to find parent project.""" - mock_get_client.return_value = self.mock_syn - mock_create_entity_view.return_value = "syn87654321" - - mock_folder = Mock() - mock_folder_cls.return_value = mock_folder - mock_folder.get.return_value = mock_folder - mock_folder.parent_id = "syn11111111" - - # Mock parent folder - mock_parent_folder = Mock() - mock_parent_folder.concreteType = "org.sagebionetworks.repo.model.Folder" - mock_parent_folder.parentId = "syn22222222" - - # Mock project - mock_project = Mock() - mock_project.concreteType = "org.sagebionetworks.repo.model.Project" - mock_project.id = "syn22222222" - - self.mock_syn.get.side_effect = [mock_parent_folder, mock_project] - - mock_task = Mock() - mock_task.task_id = "task123" - - with patch( - "synapseclient.extensions.curator.file_based_metadata_task.CurationTask" - ) as mock_curation_task_cls: - mock_curation_task = Mock() - mock_curation_task.store.return_value = mock_task - mock_curation_task_cls.return_value = mock_curation_task - - result = create_file_based_metadata_task( - folder_id=self.folder_id, - curation_task_name=self.curation_task_name, - instructions=self.instructions, - attach_wiki=False, - synapse_client=self.mock_syn, - ) - - self.assertEqual(result, ("syn87654321", "task123")) - # Verify that syn.get was called twice (for parent folder and project) - self.assertEqual(self.mock_syn.get.call_count, 2) - @patch( "synapseclient.extensions.curator.file_based_metadata_task.Synapse.get_client" ) @@ -448,6 +409,7 @@ def test_create_file_based_metadata_task_with_assignee( mock_folder_cls, mock_create_entity_view, mock_get_client, + mock_get_project_id_from_entity_id, ): """Test successful creation of file-based metadata task with assignee_principal_id.""" # Test both string and int inputs - int should be converted to string @@ -463,10 +425,12 @@ def test_create_file_based_metadata_task_with_assignee( mock_folder_cls.reset_mock() mock_create_entity_view.reset_mock() mock_get_client.reset_mock() + mock_get_project_id_from_entity_id.reset_mock() # GIVEN a file-based metadata task with assignee_principal_id mock_get_client.return_value = self.mock_syn mock_create_entity_view.return_value = "test_entity_view_id" + mock_get_project_id_from_entity_id.return_value = self.project_id mock_folder = Mock() mock_folder_cls.return_value = mock_folder @@ -500,7 +464,7 @@ def test_create_file_based_metadata_task_with_assignee( # THEN the CurationTask should be called with assignee_principal_id as string mock_curation_task_cls.assert_called_once_with( data_type=self.curation_task_name, - project_id="syn22222222", + project_id=self.project_id, instructions=self.instructions, assignee_principal_id=expected_assignee, task_properties=FileBasedMetadataTaskProperties( From 93aa593602d783fbf9a0935d2a26b958e9033dd7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Feb 2026 09:51:52 -0800 Subject: [PATCH 05/11] fix typo --- synapseclient/extensions/curator/record_based_metadata_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index aa67a7445..7e7f12b15 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -179,7 +179,7 @@ def create_record_based_metadata_task( synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. - project_id: Deprecated, will be removed in v5.0.0 + project_id: Deprecated, will be removed in v5.0.0 Returns: Tuple containing the created RecordSet, CurationTask, and Grid objects From 43ad871c01e979437e929bbe928852423cfdb7dc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Feb 2026 09:52:43 -0800 Subject: [PATCH 06/11] fix typo --- synapseclient/extensions/curator/record_based_metadata_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index 7e7f12b15..c53831fd7 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -298,7 +298,7 @@ def create_record_based_metadata_task( def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str: """ - Retrieves the project ID from a given entity ID by traversing up the the folder hierarchy + Retrieves the project ID from a given entity ID by traversing up the folder hierarchy Args: entity_id: The Synapse ID of the entity (e.g., folder, file) to start from. From 4c992b0fbb127a9c7eb841ed7d33b887af5aab60 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Feb 2026 09:54:09 -0800 Subject: [PATCH 07/11] rename iter --- .../extensions/curator/record_based_metadata_task.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index c53831fd7..d4bc04225 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -313,10 +313,10 @@ def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str: # Get the project ID from the folder ID current_obj = get(entity_id, synapse_client=synapse_client) - iter = 0 + iterations = 0 while not isinstance(current_obj, Project): current_obj = get(current_obj.parent_id, synapse_client=synapse_client) - iter += 1 - if iter > 1000: + iterations += 1 + if iterations > 1000: raise ValueError("Could not find project ID in folder hierarchy") return current_obj.id From 677cdb5844ff31508d732dd2b1e4254a50f4f6ff Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Feb 2026 09:59:27 -0800 Subject: [PATCH 08/11] added docstrings --- .../curator/test_record_based_metadata_task.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py b/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py index 7127964bd..cceb8c867 100644 --- a/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py +++ b/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py @@ -1,3 +1,5 @@ +import uuid + import pytest from synapseclient import Synapse @@ -11,7 +13,8 @@ class TestProjectIDFromEntityID: @pytest.fixture(scope="module") def temp_hierarchy(self, syn: Synapse, request) -> tuple[str, str, str]: """Creates a Project -> Folder -> Folder hierarchy for testing.""" - project = Project(name="IntegrationTest_Root_Project").store(synapse_client=syn) + project_name = str(uuid.uuid4()) + project = Project(name=project_name).store(synapse_client=syn) folder1 = Folder(name="TestFolder1", parent_id=project.id).store( synapse_client=syn ) @@ -26,7 +29,7 @@ def delete_project(): return project.id, folder1.id, folder2.id def test_project_id_from_folder(self, syn, temp_hierarchy): - # Test finding project from a nested file + """Test finding project id when input id is from a nested folder.""" folder_id = temp_hierarchy[2] expected_project_id = temp_hierarchy[0] @@ -34,7 +37,7 @@ def test_project_id_from_folder(self, syn, temp_hierarchy): assert result == expected_project_id def test_project_id_from_project(self, syn, temp_hierarchy): - # Test finding project from a nested file + """Test finding project id when input id is for a project""" project_id = temp_hierarchy[0] result = project_id_from_entity_id(project_id, syn) From 7b95bcb74cc46ac0c5e25813b6907f674ae84577 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Feb 2026 11:23:29 -0800 Subject: [PATCH 09/11] moved util into a utils file --- .../curator/file_based_metadata_task.py | 4 +- .../curator/record_based_metadata_task.py | 29 +----------- synapseclient/extensions/curator/utils.py | 29 ++++++++++++ .../test_record_based_metadata_task.py | 44 ------------------- .../curator}/test_schema_management.py | 39 +++++++++++++++- 5 files changed, 69 insertions(+), 76 deletions(-) create mode 100644 synapseclient/extensions/curator/utils.py delete mode 100644 tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py rename tests/integration/synapseclient/{ => extensions/curator}/test_schema_management.py (84%) diff --git a/synapseclient/extensions/curator/file_based_metadata_task.py b/synapseclient/extensions/curator/file_based_metadata_task.py index 367cc994f..98b09c6c3 100644 --- a/synapseclient/extensions/curator/file_based_metadata_task.py +++ b/synapseclient/extensions/curator/file_based_metadata_task.py @@ -10,6 +10,7 @@ from synapseclient import Synapse # type: ignore from synapseclient import Wiki # type: ignore from synapseclient.core.exceptions import SynapseHTTPError # type: ignore +from synapseclient.extensions.curator.utils import project_id_from_entity_id from synapseclient.models import ( # type: ignore Column, ColumnType, @@ -430,9 +431,6 @@ def create_file_based_metadata_task( synapse_client.logger.info( "Attempting to get the Synapse ID of the provided folders project." ) - from synapseclient.extensions.curator.record_based_metadata_task import ( - project_id_from_entity_id, - ) project_id = project_id_from_entity_id(folder_id, synapse_client=synapse_client) synapse_client.logger.info("Got the Synapse ID of the provided folders project.") diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index d4bc04225..b56ab50c7 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -11,15 +11,14 @@ from synapseclient import Synapse from synapseclient.core.typing_utils import DataFrame as DATA_FRAME_TYPE from synapseclient.core.utils import test_import_pandas +from synapseclient.extensions.curator.utils import project_id_from_entity_id from synapseclient.models import ( CurationTask, Grid, JSONSchema, - Project, RecordBasedMetadataTaskProperties, RecordSet, ) -from synapseclient.operations import get def extract_property_titles(schema_data: Dict[str, Any]) -> List[str]: @@ -294,29 +293,3 @@ def create_record_based_metadata_task( raise e return record_set_with_data, curation_task, curation_grid - - -def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str: - """ - Retrieves the project ID from a given entity ID by traversing up the folder hierarchy - - Args: - entity_id: The Synapse ID of the entity (e.g., folder, file) to start from. - synapse_client: Authenticated Synapse client instance - - Returns: - The Synapse ID of the project that the entity belongs to. - - Raises: - ValueError: If the project ID cannot be found within 1000 iterations. - """ - - # Get the project ID from the folder ID - current_obj = get(entity_id, synapse_client=synapse_client) - iterations = 0 - while not isinstance(current_obj, Project): - current_obj = get(current_obj.parent_id, synapse_client=synapse_client) - iterations += 1 - if iterations > 1000: - raise ValueError("Could not find project ID in folder hierarchy") - return current_obj.id diff --git a/synapseclient/extensions/curator/utils.py b/synapseclient/extensions/curator/utils.py new file mode 100644 index 000000000..841ad2147 --- /dev/null +++ b/synapseclient/extensions/curator/utils.py @@ -0,0 +1,29 @@ +from synapseclient import Synapse +from synapseclient.models import Project +from synapseclient.operations import get + + +def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str: + """ + Retrieves the project ID from a given entity ID by traversing up the folder hierarchy + + Args: + entity_id: The Synapse ID of the entity (e.g., folder, file) to start from. + synapse_client: Authenticated Synapse client instance + + Returns: + The Synapse ID of the project that the entity belongs to. + + Raises: + ValueError: If the project ID cannot be found within 1000 iterations. + """ + + # Get the project ID from the folder ID + current_obj = get(entity_id, synapse_client=synapse_client) + iterations = 0 + while not isinstance(current_obj, Project): + current_obj = get(current_obj.parent_id, synapse_client=synapse_client) + iterations += 1 + if iterations > 1000: + raise ValueError("Could not find project ID in folder hierarchy") + return current_obj.id diff --git a/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py b/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py deleted file mode 100644 index cceb8c867..000000000 --- a/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py +++ /dev/null @@ -1,44 +0,0 @@ -import uuid - -import pytest - -from synapseclient import Synapse -from synapseclient.extensions.curator.record_based_metadata_task import ( - project_id_from_entity_id, -) -from synapseclient.models import Folder, Project - - -class TestProjectIDFromEntityID: - @pytest.fixture(scope="module") - def temp_hierarchy(self, syn: Synapse, request) -> tuple[str, str, str]: - """Creates a Project -> Folder -> Folder hierarchy for testing.""" - project_name = str(uuid.uuid4()) - project = Project(name=project_name).store(synapse_client=syn) - folder1 = Folder(name="TestFolder1", parent_id=project.id).store( - synapse_client=syn - ) - folder2 = Folder(name="TestFolder2", parent_id=folder1.id).store( - synapse_client=syn - ) - - def delete_project(): - project.delete(synapse_client=syn) - - request.addfinalizer(delete_project) - return project.id, folder1.id, folder2.id - - def test_project_id_from_folder(self, syn, temp_hierarchy): - """Test finding project id when input id is from a nested folder.""" - folder_id = temp_hierarchy[2] - expected_project_id = temp_hierarchy[0] - - result = project_id_from_entity_id(folder_id, syn) - assert result == expected_project_id - - def test_project_id_from_project(self, syn, temp_hierarchy): - """Test finding project id when input id is for a project""" - project_id = temp_hierarchy[0] - - result = project_id_from_entity_id(project_id, syn) - assert result == project_id diff --git a/tests/integration/synapseclient/test_schema_management.py b/tests/integration/synapseclient/extensions/curator/test_schema_management.py similarity index 84% rename from tests/integration/synapseclient/test_schema_management.py rename to tests/integration/synapseclient/extensions/curator/test_schema_management.py index 34a9f7d94..e7b2cf0a1 100644 --- a/tests/integration/synapseclient/test_schema_management.py +++ b/tests/integration/synapseclient/extensions/curator/test_schema_management.py @@ -8,7 +8,10 @@ from synapseclient import Synapse from synapseclient.extensions.curator import bind_jsonschema, register_jsonschema -from synapseclient.models import File, Folder, Project, SchemaOrganization +from synapseclient.extensions.curator.record_based_metadata_task import ( + project_id_from_entity_id, +) +from synapseclient.models import Folder, Project, SchemaOrganization def create_test_name(): @@ -259,3 +262,37 @@ def test_complete_workflow( # Cleanup: unbind schema before deleting folder folder.unbind_schema(synapse_client=syn) syn.delete(folder.id) + + +class TestProjectIDFromEntityID: + @pytest.fixture(scope="module") + def temp_hierarchy(self, syn: Synapse, request) -> tuple[str, str, str]: + """Creates a Project -> Folder -> Folder hierarchy for testing.""" + project = Project(name=create_test_name()).store(synapse_client=syn) + folder1 = Folder(name=create_test_name(), parent_id=project.id).store( + synapse_client=syn + ) + folder2 = Folder(name=create_test_name(), parent_id=folder1.id).store( + synapse_client=syn + ) + + def delete_project(): + project.delete(synapse_client=syn) + + request.addfinalizer(delete_project) + return project.id, folder1.id, folder2.id + + def test_project_id_from_folder(self, syn, temp_hierarchy): + """Test finding project id when input id is from a nested folder.""" + folder_id = temp_hierarchy[2] + expected_project_id = temp_hierarchy[0] + + result = project_id_from_entity_id(folder_id, syn) + assert result == expected_project_id + + def test_project_id_from_project(self, syn, temp_hierarchy): + """Test finding project id when input id is for a project""" + project_id = temp_hierarchy[0] + + result = project_id_from_entity_id(project_id, syn) + assert result == project_id From 44957cd174f7c4af61fab21841128d529e4d53af Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Feb 2026 11:32:17 -0800 Subject: [PATCH 10/11] fix broken patches --- tests/unit/synapseclient/extensions/unit_test_curator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index 661eac845..c735eccd7 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -79,7 +79,7 @@ def setUp(self): self.schema_uri = "sage.schemas.v2571-amp.Biospecimen.schema-0.0.1" @patch( - "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + "synapseclient.extensions.curator.file_based_metadata_task.project_id_from_entity_id" ) @patch( "synapseclient.extensions.curator.file_based_metadata_task.Synapse.get_client" @@ -155,7 +155,7 @@ def test_create_file_based_metadata_task_success_with_schema( mock_get.assert_called_once_with(self.folder_id, synapse_client=self.mock_syn) @patch( - "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + "synapseclient.extensions.curator.file_based_metadata_task.project_id_from_entity_id" ) @patch( "synapseclient.extensions.curator.file_based_metadata_task.Synapse.get_client" @@ -393,7 +393,7 @@ def test_create_file_based_metadata_task_schema_retrieval_error( ) @patch( - "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + "synapseclient.extensions.curator.file_based_metadata_task.project_id_from_entity_id" ) @patch( "synapseclient.extensions.curator.file_based_metadata_task.Synapse.get_client" @@ -501,7 +501,7 @@ def setUp(self): self.instructions = "Test instructions" self.schema_uri = "sage.schemas.v2571-amp.Biospecimen.schema-0.0.1" - @patch("synapseclient.extensions.curator.record_based_metadata_task.get") + @patch("synapseclient.extensions.curator.utils.get") def test_project_id_from_entity_id_hierarchy_limit_exceeded(self, mock_get): """Tests the 'iter > 1000' safety break.""" mock_folder = Folder(id="syn123", parent_id="syn456") From 5b72682028b47816d49318aea78a751a3eed99a5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 26 Feb 2026 12:06:29 -0800 Subject: [PATCH 11/11] crate iteration global --- synapseclient/extensions/curator/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapseclient/extensions/curator/utils.py b/synapseclient/extensions/curator/utils.py index 841ad2147..b63f9bfac 100644 --- a/synapseclient/extensions/curator/utils.py +++ b/synapseclient/extensions/curator/utils.py @@ -2,6 +2,9 @@ from synapseclient.models import Project from synapseclient.operations import get +"""This number represents a safeguard against infinite loops when traversing the folder hierarchy to find the project ID.""" +MAX_HIERARCHY_DEPTH = 1000 + def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str: """ @@ -24,6 +27,6 @@ def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str: while not isinstance(current_obj, Project): current_obj = get(current_obj.parent_id, synapse_client=synapse_client) iterations += 1 - if iterations > 1000: + if iterations > MAX_HIERARCHY_DEPTH: raise ValueError("Could not find project ID in folder hierarchy") return current_obj.id