From 2b7e81490ea70401301b1a4c58fd12a9249e8680 Mon Sep 17 00:00:00 2001 From: bruno-f-cruz <7049351+bruno-f-cruz@users.noreply.github.com> Date: Fri, 1 May 2026 15:53:19 -0700 Subject: [PATCH 1/8] Add git hash to Code model --- .../components/identifiers.py | 15 +++++++++ tests/test_identifiers.py | 32 ++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/aind_data_schema/components/identifiers.py b/src/aind_data_schema/components/identifiers.py index 5a3b2c3f8..7a4ede415 100644 --- a/src/aind_data_schema/components/identifiers.py +++ b/src/aind_data_schema/components/identifiers.py @@ -7,6 +7,9 @@ from aind_data_schema_models.registries import Registry from pydantic import Field + +from typing import Annotated +from pydantic import StringConstraints from aind_data_schema.base import DataModel, DiscriminatedList, GenericModel @@ -67,12 +70,24 @@ class Container(DataModel): uri: str = Field(..., title="URI", description="URI of the container, e.g. Docker Hub URL") +GitHash = Annotated[ + str, + StringConstraints( + pattern=r"^[0-9a-fA-F]{7,40}$", + strip_whitespace=True, + ), +] + + class Code(DataModel): """Code or script identifier""" url: str = Field(..., title="Code URL", description="URL to code repository") name: Optional[str] = Field(default=None, title="Name") version: Optional[str] = Field(default=None, title="Code version") + git_hash: Optional[GitHash] = Field( + default=None, title="Git hash", description="Git hash of the code repository state" + ) container: Optional[Container] = Field(default=None, title="Container") run_script: Optional[Path] = Field(default=None, title="Run script", description="Path to run script") diff --git a/tests/test_identifiers.py b/tests/test_identifiers.py index 780ae0d4d..332fdf93b 100644 --- a/tests/test_identifiers.py +++ b/tests/test_identifiers.py @@ -4,7 +4,7 @@ from pydantic import ValidationError -from aind_data_schema.components.identifiers import Person +from aind_data_schema.components.identifiers import Code, Person class Testexperimenter(unittest.TestCase): @@ -23,5 +23,35 @@ def test_experimenter_missing_fields(self): Person() +class TestGitHash(unittest.TestCase): + """Test GitHash type validation via Code model""" + + def test_git_hash_valid(self): + """Valid git hashes are accepted and stored correctly""" + cases = [ + ("abc1234", "abc1234"), + ("a" * 40, "a" * 40), + ("aBcDeF1", "aBcDeF1"), + ("deadbeef1234", "deadbeef1234"), + (" abc1234 ", "abc1234"), # strip_whitespace=True strips before validation + ] + for git_hash, expected in cases: + with self.subTest(git_hash=git_hash): + code = Code(url="https://github.com/org/repo", git_hash=git_hash) + self.assertEqual(code.git_hash, expected) + + def test_git_hash_invalid(self): + """Invalid git hashes raise ValidationError""" + cases = [ + "abc123", # too short (6 chars) + "a" * 41, # too long (41 chars) + "xyz12345", # non-hex characters + ] + for git_hash in cases: + with self.subTest(git_hash=git_hash): + with self.assertRaises(ValidationError): + Code(url="https://github.com/org/repo", git_hash=git_hash) + + if __name__ == "__main__": unittest.main() From c0f0825dac93f2c1e3a451c8a1b8605b612f9f6f Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 1 May 2026 22:54:26 +0000 Subject: [PATCH 2/8] update docs --- docs/source/components/identifiers.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/components/identifiers.md b/docs/source/components/identifiers.md index b20bf0b68..48ca656c3 100644 --- a/docs/source/components/identifiers.md +++ b/docs/source/components/identifiers.md @@ -11,6 +11,7 @@ Code or script identifier | `url` | `str` | Code URL (URL to code repository) | | `name` | `Optional[str]` | Name | | `version` | `Optional[str]` | Code version | +| `git_hash` | `Optional[str]` | Git hash (Git hash of the code repository state) | | `container` | Optional[[Container](#container)] | Container | | `run_script` | `Optional[pathlib.Path]` | Run script (Path to run script) | | `language` | `Optional[str]` | Programming language (Programming language used) | From acedf2527a408ff8767439fd61cd219ae692aaa4 Mon Sep 17 00:00:00 2001 From: bruno-f-cruz <7049351+bruno-f-cruz@users.noreply.github.com> Date: Tue, 5 May 2026 15:43:49 -0700 Subject: [PATCH 3/8] Increase hash length upper bound --- src/aind_data_schema/components/identifiers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aind_data_schema/components/identifiers.py b/src/aind_data_schema/components/identifiers.py index 7a4ede415..00368402d 100644 --- a/src/aind_data_schema/components/identifiers.py +++ b/src/aind_data_schema/components/identifiers.py @@ -73,7 +73,7 @@ class Container(DataModel): GitHash = Annotated[ str, StringConstraints( - pattern=r"^[0-9a-fA-F]{7,40}$", + pattern=r"^[0-9a-fA-F]{7,60}$", strip_whitespace=True, ), ] From 270ba7664d3e5b06f4d6592caaaad74c00333495 Mon Sep 17 00:00:00 2001 From: bruno-f-cruz <7049351+bruno-f-cruz@users.noreply.github.com> Date: Tue, 5 May 2026 15:52:29 -0700 Subject: [PATCH 4/8] Change property name and emit warning if hash and version are not defined --- src/aind_data_schema/components/identifiers.py | 17 ++++++++++++----- tests/test_identifiers.py | 6 +++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/aind_data_schema/components/identifiers.py b/src/aind_data_schema/components/identifiers.py index 00368402d..20fd3c85f 100644 --- a/src/aind_data_schema/components/identifiers.py +++ b/src/aind_data_schema/components/identifiers.py @@ -5,8 +5,8 @@ from typing import Dict, List, Optional from aind_data_schema_models.registries import Registry -from pydantic import Field - +from pydantic import Field, model_validator +import warnings from typing import Annotated from pydantic import StringConstraints @@ -85,9 +85,7 @@ class Code(DataModel): url: str = Field(..., title="Code URL", description="URL to code repository") name: Optional[str] = Field(default=None, title="Name") version: Optional[str] = Field(default=None, title="Code version") - git_hash: Optional[GitHash] = Field( - default=None, title="Git hash", description="Git hash of the code repository state" - ) + commit_hash: Optional[GitHash] = Field(default=None, title="Commit hash", description="Commit hash of the code.") container: Optional[Container] = Field(default=None, title="Container") run_script: Optional[Path] = Field(default=None, title="Run script", description="Path to run script") @@ -107,3 +105,12 @@ class Code(DataModel): title="Core dependency", description="For code with a core software package dependency, e.g. Bonsai", ) + + @model_validator(mode="after") + def _ensure_commit_hash_or_version(self) -> "Code": + """Ensure that at least one of commit_hash or version is provided for code identification""" + if not self.commit_hash and not self.version: + warnings.warn( + "Neither commit_hash nor version provided for Code. It's recommended to provide at least one to ensure reproducibility. In the future, we will require at least one of these fields." + ) + return self diff --git a/tests/test_identifiers.py b/tests/test_identifiers.py index 332fdf93b..4df6176d7 100644 --- a/tests/test_identifiers.py +++ b/tests/test_identifiers.py @@ -37,8 +37,8 @@ def test_git_hash_valid(self): ] for git_hash, expected in cases: with self.subTest(git_hash=git_hash): - code = Code(url="https://github.com/org/repo", git_hash=git_hash) - self.assertEqual(code.git_hash, expected) + code = Code(url="https://github.com/org/repo", commit_hash=git_hash) + self.assertEqual(code.commit_hash, expected) def test_git_hash_invalid(self): """Invalid git hashes raise ValidationError""" @@ -50,7 +50,7 @@ def test_git_hash_invalid(self): for git_hash in cases: with self.subTest(git_hash=git_hash): with self.assertRaises(ValidationError): - Code(url="https://github.com/org/repo", git_hash=git_hash) + Code(url="https://github.com/org/repo", commit_hash=git_hash) if __name__ == "__main__": From 449e85c74de86ad1e9980490092aacb81c586479 Mon Sep 17 00:00:00 2001 From: bruno-f-cruz <7049351+bruno-f-cruz@users.noreply.github.com> Date: Tue, 5 May 2026 15:53:25 -0700 Subject: [PATCH 5/8] Refactor type alias --- src/aind_data_schema/components/identifiers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aind_data_schema/components/identifiers.py b/src/aind_data_schema/components/identifiers.py index 20fd3c85f..9a9704eef 100644 --- a/src/aind_data_schema/components/identifiers.py +++ b/src/aind_data_schema/components/identifiers.py @@ -70,7 +70,7 @@ class Container(DataModel): uri: str = Field(..., title="URI", description="URI of the container, e.g. Docker Hub URL") -GitHash = Annotated[ +CommitHash = Annotated[ str, StringConstraints( pattern=r"^[0-9a-fA-F]{7,60}$", @@ -85,7 +85,7 @@ class Code(DataModel): url: str = Field(..., title="Code URL", description="URL to code repository") name: Optional[str] = Field(default=None, title="Name") version: Optional[str] = Field(default=None, title="Code version") - commit_hash: Optional[GitHash] = Field(default=None, title="Commit hash", description="Commit hash of the code.") + commit_hash: Optional[CommitHash] = Field(default=None, title="Commit hash", description="Commit hash of the code.") container: Optional[Container] = Field(default=None, title="Container") run_script: Optional[Path] = Field(default=None, title="Run script", description="Path to run script") From ec9431ff2749575baa254644117c804e4012f55c Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 22:54:13 +0000 Subject: [PATCH 6/8] update docs --- docs/source/components/identifiers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/components/identifiers.md b/docs/source/components/identifiers.md index 48ca656c3..b26cc6f62 100644 --- a/docs/source/components/identifiers.md +++ b/docs/source/components/identifiers.md @@ -11,7 +11,7 @@ Code or script identifier | `url` | `str` | Code URL (URL to code repository) | | `name` | `Optional[str]` | Name | | `version` | `Optional[str]` | Code version | -| `git_hash` | `Optional[str]` | Git hash (Git hash of the code repository state) | +| `commit_hash` | `Optional[str]` | Commit hash (Commit hash of the code.) | | `container` | Optional[[Container](#container)] | Container | | `run_script` | `Optional[pathlib.Path]` | Run script (Path to run script) | | `language` | `Optional[str]` | Programming language (Programming language used) | From d578f979e0c61a5eb18fcac65b500a3f6474b6fd Mon Sep 17 00:00:00 2001 From: bruno-f-cruz <7049351+bruno-f-cruz@users.noreply.github.com> Date: Tue, 5 May 2026 16:10:23 -0700 Subject: [PATCH 7/8] Break lines --- src/aind_data_schema/components/identifiers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/aind_data_schema/components/identifiers.py b/src/aind_data_schema/components/identifiers.py index 9a9704eef..428dc1fb5 100644 --- a/src/aind_data_schema/components/identifiers.py +++ b/src/aind_data_schema/components/identifiers.py @@ -111,6 +111,8 @@ def _ensure_commit_hash_or_version(self) -> "Code": """Ensure that at least one of commit_hash or version is provided for code identification""" if not self.commit_hash and not self.version: warnings.warn( - "Neither commit_hash nor version provided for Code. It's recommended to provide at least one to ensure reproducibility. In the future, we will require at least one of these fields." + "Neither commit_hash nor version provided for Code. " + "It's recommended to provide at least one to ensure reproducibility. " + "In the future, we will require at least one of these fields." ) return self From 4d907f906c582547faee1a214eeb91e03cf68bad Mon Sep 17 00:00:00 2001 From: bruno-f-cruz <7049351+bruno-f-cruz@users.noreply.github.com> Date: Tue, 5 May 2026 16:11:52 -0700 Subject: [PATCH 8/8] Update tests to reflect longer hashes --- tests/test_identifiers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_identifiers.py b/tests/test_identifiers.py index 4df6176d7..155341ab3 100644 --- a/tests/test_identifiers.py +++ b/tests/test_identifiers.py @@ -31,6 +31,7 @@ def test_git_hash_valid(self): cases = [ ("abc1234", "abc1234"), ("a" * 40, "a" * 40), + ("a" * 60, "a" * 60), ("aBcDeF1", "aBcDeF1"), ("deadbeef1234", "deadbeef1234"), (" abc1234 ", "abc1234"), # strip_whitespace=True strips before validation @@ -44,7 +45,7 @@ def test_git_hash_invalid(self): """Invalid git hashes raise ValidationError""" cases = [ "abc123", # too short (6 chars) - "a" * 41, # too long (41 chars) + "a" * 61, # too long (61 chars) "xyz12345", # non-hex characters ] for git_hash in cases: