diff --git a/docs/source/components/identifiers.md b/docs/source/components/identifiers.md index b20bf0b68..b26cc6f62 100644 --- a/docs/source/components/identifiers.md +++ b/docs/source/components/identifiers.md @@ -11,6 +11,7 @@ Code or script identifier | `url` | `str` | Code URL (URL to code repository) | | `name` | `Optional[str]` | Name | | `version` | `Optional[str]` | Code version | +| `commit_hash` | `Optional[str]` | Commit hash (Commit hash of the code.) | | `container` | Optional[[Container](#container)] | Container | | `run_script` | `Optional[pathlib.Path]` | Run script (Path to run script) | | `language` | `Optional[str]` | Programming language (Programming language used) | diff --git a/src/aind_data_schema/components/identifiers.py b/src/aind_data_schema/components/identifiers.py index 5a3b2c3f8..428dc1fb5 100644 --- a/src/aind_data_schema/components/identifiers.py +++ b/src/aind_data_schema/components/identifiers.py @@ -5,8 +5,11 @@ from typing import Dict, List, Optional from aind_data_schema_models.registries import Registry -from pydantic import Field +from pydantic import Field, model_validator +import warnings +from typing import Annotated +from pydantic import StringConstraints from aind_data_schema.base import DataModel, DiscriminatedList, GenericModel @@ -67,12 +70,22 @@ class Container(DataModel): uri: str = Field(..., title="URI", description="URI of the container, e.g. Docker Hub URL") +CommitHash = Annotated[ + str, + StringConstraints( + pattern=r"^[0-9a-fA-F]{7,60}$", + strip_whitespace=True, + ), +] + + class Code(DataModel): """Code or script identifier""" url: str = Field(..., title="Code URL", description="URL to code repository") name: Optional[str] = Field(default=None, title="Name") version: Optional[str] = Field(default=None, title="Code version") + commit_hash: Optional[CommitHash] = Field(default=None, title="Commit hash", description="Commit hash of the code.") container: Optional[Container] = Field(default=None, title="Container") run_script: Optional[Path] = Field(default=None, title="Run script", description="Path to run script") @@ -92,3 +105,14 @@ class Code(DataModel): title="Core dependency", description="For code with a core software package dependency, e.g. Bonsai", ) + + @model_validator(mode="after") + def _ensure_commit_hash_or_version(self) -> "Code": + """Ensure that at least one of commit_hash or version is provided for code identification""" + if not self.commit_hash and not self.version: + warnings.warn( + "Neither commit_hash nor version provided for Code. " + "It's recommended to provide at least one to ensure reproducibility. " + "In the future, we will require at least one of these fields." + ) + return self diff --git a/tests/test_identifiers.py b/tests/test_identifiers.py index 780ae0d4d..155341ab3 100644 --- a/tests/test_identifiers.py +++ b/tests/test_identifiers.py @@ -4,7 +4,7 @@ from pydantic import ValidationError -from aind_data_schema.components.identifiers import Person +from aind_data_schema.components.identifiers import Code, Person class Testexperimenter(unittest.TestCase): @@ -23,5 +23,36 @@ def test_experimenter_missing_fields(self): Person() +class TestGitHash(unittest.TestCase): + """Test GitHash type validation via Code model""" + + def test_git_hash_valid(self): + """Valid git hashes are accepted and stored correctly""" + cases = [ + ("abc1234", "abc1234"), + ("a" * 40, "a" * 40), + ("a" * 60, "a" * 60), + ("aBcDeF1", "aBcDeF1"), + ("deadbeef1234", "deadbeef1234"), + (" abc1234 ", "abc1234"), # strip_whitespace=True strips before validation + ] + for git_hash, expected in cases: + with self.subTest(git_hash=git_hash): + code = Code(url="https://github.com/org/repo", commit_hash=git_hash) + self.assertEqual(code.commit_hash, expected) + + def test_git_hash_invalid(self): + """Invalid git hashes raise ValidationError""" + cases = [ + "abc123", # too short (6 chars) + "a" * 61, # too long (61 chars) + "xyz12345", # non-hex characters + ] + for git_hash in cases: + with self.subTest(git_hash=git_hash): + with self.assertRaises(ValidationError): + Code(url="https://github.com/org/repo", commit_hash=git_hash) + + if __name__ == "__main__": unittest.main()