Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 4 additions & 15 deletions synapseclient/extensions/curator/file_based_metadata_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from synapseclient import Synapse # type: ignore
from synapseclient import Wiki # type: ignore
from synapseclient.core.exceptions import SynapseHTTPError # type: ignore
from synapseclient.extensions.curator.utils import project_id_from_entity_id
from synapseclient.models import ( # type: ignore
Column,
ColumnType,
Expand Down Expand Up @@ -430,27 +431,15 @@ def create_file_based_metadata_task(
synapse_client.logger.info(
"Attempting to get the Synapse ID of the provided folders project."
)
try:
entity = Folder(folder_id).get(synapse_client=synapse_client)
parent = synapse_client.get(entity.parent_id)
project = None
while not project:
if parent.concreteType == "org.sagebionetworks.repo.model.Project":
project = parent
break
parent = synapse_client.get(parent.parentId)
except Exception as e:
synapse_client.logger.exception(
"Error getting the Synapse ID of the provided folders project"
)
raise e

project_id = project_id_from_entity_id(folder_id, synapse_client=synapse_client)
synapse_client.logger.info("Got the Synapse ID of the provided folders project.")

synapse_client.logger.info("Attempting to create the CurationTask.")
try:
task = CurationTask(
data_type=task_datatype,
project_id=project.id,
project_id=project_id,
instructions=instructions,
assignee_principal_id=(
str(assignee_principal_id)
Expand Down
22 changes: 16 additions & 6 deletions synapseclient/extensions/curator/record_based_metadata_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from synapseclient import Synapse
from synapseclient.core.typing_utils import DataFrame as DATA_FRAME_TYPE
from synapseclient.core.utils import test_import_pandas
from synapseclient.extensions.curator.utils import project_id_from_entity_id
from synapseclient.models import (
CurationTask,
Grid,
Expand Down Expand Up @@ -99,7 +100,6 @@ def extract_schema_properties_from_web(


def create_record_based_metadata_task(
project_id: str,
folder_id: str,
record_set_name: str,
record_set_description: str,
Expand All @@ -112,6 +112,7 @@ def create_record_based_metadata_task(
assignee_principal_id: Optional[Union[str, int]] = None,
*,
synapse_client: Optional[Synapse] = None,
project_id: Optional[str] = None, # Deprecated, will be removed in v5.0.0
) -> Tuple[RecordSet, CurationTask, Grid]:
"""
Generate and upload CSV templates as a RecordSet for record-based metadata,
Expand Down Expand Up @@ -142,7 +143,6 @@ def create_record_based_metadata_task(

record_set, task, grid = create_record_based_metadata_task(
synapse_client=syn,
project_id="syn12345678",
folder_id="syn87654321",
record_set_name="BiospecimenMetadata_RecordSet",
record_set_description="RecordSet for biospecimen metadata curation",
Expand All @@ -155,9 +155,10 @@ def create_record_based_metadata_task(
```

Arguments:
project_id: The Synapse ID of the project where the folder exists.
folder_id: The Synapse ID of the folder to upload RecordSet to.
record_set_name: Name for the RecordSet.
record_set_name: Name for the RecordSet entity that will be created.
A RecordSet entity captures record-based metadata as a special type of CSV and stores contributor
provided metadata collected via Curator enabling sharing and download of validated metadata in Synapse.
record_set_description: Description for the RecordSet.
curation_task_name: Name for the CurationTask (used as data_type field).
Must be unique within the project, otherwise if it matches an existing
Expand All @@ -177,6 +178,7 @@ def create_record_based_metadata_task(
synapse_client: If not passed in and caching was not disabled by
`Synapse.allow_client_caching(False)` this will use the last created
instance from the Synapse class constructor.
project_id: Deprecated, will be removed in v5.0.0

Returns:
Tuple containing the created RecordSet, CurationTask, and Grid objects
Expand All @@ -186,8 +188,6 @@ def create_record_based_metadata_task(
SynapseError: If there are issues with Synapse operations.
"""
# Validate required parameters
if not project_id:
raise ValueError("project_id is required")
if not folder_id:
raise ValueError("folder_id is required")
if not record_set_name:
Expand All @@ -203,8 +203,18 @@ def create_record_based_metadata_task(
if not schema_uri:
raise ValueError("schema_uri is required")

if project_id:
synapse_client.logger.warning(
"The 'project_id' parameter is deprecated and will be removed in v5.0.0. "
"The project ID will be inferred from the folder ID provided."
)

synapse_client = Synapse.get_client(synapse_client=synapse_client)

project_id = project_id_from_entity_id(
entity_id=folder_id, synapse_client=synapse_client
)

template_df = extract_schema_properties_from_web(
syn=synapse_client, schema_uri=schema_uri
)
Expand Down
32 changes: 32 additions & 0 deletions synapseclient/extensions/curator/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from synapseclient import Synapse
from synapseclient.models import Project
from synapseclient.operations import get

"""This number represents a safeguard against infinite loops when traversing the folder hierarchy to find the project ID."""
MAX_HIERARCHY_DEPTH = 1000


def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str:
"""
Retrieves the project ID from a given entity ID by traversing up the folder hierarchy

Args:
entity_id: The Synapse ID of the entity (e.g., folder, file) to start from.
synapse_client: Authenticated Synapse client instance

Returns:
The Synapse ID of the project that the entity belongs to.

Raises:
ValueError: If the project ID cannot be found within 1000 iterations.
"""

# Get the project ID from the folder ID
current_obj = get(entity_id, synapse_client=synapse_client)
iterations = 0
while not isinstance(current_obj, Project):
current_obj = get(current_obj.parent_id, synapse_client=synapse_client)
iterations += 1
if iterations > MAX_HIERARCHY_DEPTH:
raise ValueError("Could not find project ID in folder hierarchy")
return current_obj.id
7 changes: 6 additions & 1 deletion synapseclient/models/recordset.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,12 @@ def get_detailed_validation_results(
@dataclass()
@async_to_sync
class RecordSet(RecordSetSynchronousProtocol, AccessControllable, BaseJSONSchema):
"""A RecordSet within Synapse.
"""
A RecordSet entity captures record-based metadata as a special type of CSV.
The record set content can be curated using the grid services.
When a grid is created from a record set, its data can be exported back to a new version of the record set.
The export will include the validation summary as well as a validation file handle that
contains detailed validation results for each row in the record set.
Attributes:
id: The unique immutable ID for this file. A new ID will be generated for new
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

from synapseclient import Synapse
from synapseclient.extensions.curator import bind_jsonschema, register_jsonschema
from synapseclient.models import File, Folder, Project, SchemaOrganization
from synapseclient.extensions.curator.record_based_metadata_task import (
project_id_from_entity_id,
)
from synapseclient.models import Folder, Project, SchemaOrganization


def create_test_name():
Expand Down Expand Up @@ -259,3 +262,37 @@ def test_complete_workflow(
# Cleanup: unbind schema before deleting folder
folder.unbind_schema(synapse_client=syn)
syn.delete(folder.id)


class TestProjectIDFromEntityID:
@pytest.fixture(scope="module")
def temp_hierarchy(self, syn: Synapse, request) -> tuple[str, str, str]:
"""Creates a Project -> Folder -> Folder hierarchy for testing."""
project = Project(name=create_test_name()).store(synapse_client=syn)
folder1 = Folder(name=create_test_name(), parent_id=project.id).store(
synapse_client=syn
)
folder2 = Folder(name=create_test_name(), parent_id=folder1.id).store(
synapse_client=syn
)

def delete_project():
project.delete(synapse_client=syn)

request.addfinalizer(delete_project)
return project.id, folder1.id, folder2.id

def test_project_id_from_folder(self, syn, temp_hierarchy):
"""Test finding project id when input id is from a nested folder."""
folder_id = temp_hierarchy[2]
expected_project_id = temp_hierarchy[0]

result = project_id_from_entity_id(folder_id, syn)
assert result == expected_project_id

def test_project_id_from_project(self, syn, temp_hierarchy):
"""Test finding project id when input id is for a project"""
project_id = temp_hierarchy[0]

result = project_id_from_entity_id(project_id, syn)
assert result == project_id
Loading
Loading