Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/components/identifiers.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Code or script identifier
| `url` | `str` | Code URL (URL to code repository) |
| `name` | `Optional[str]` | Name |
| `version` | `Optional[str]` | Code version |
| `commit_hash` | `Optional[str]` | Commit hash (Commit hash of the code.) |
| `container` | Optional[[Container](#container)] | Container |
| `run_script` | `Optional[pathlib.Path]` | Run script (Path to run script) |
| `language` | `Optional[str]` | Programming language (Programming language used) |
Expand Down
26 changes: 25 additions & 1 deletion src/aind_data_schema/components/identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
from typing import Dict, List, Optional

from aind_data_schema_models.registries import Registry
from pydantic import Field
from pydantic import Field, model_validator
import warnings

from typing import Annotated
from pydantic import StringConstraints
from aind_data_schema.base import DataModel, DiscriminatedList, GenericModel


Expand Down Expand Up @@ -67,12 +70,22 @@ class Container(DataModel):
uri: str = Field(..., title="URI", description="URI of the container, e.g. Docker Hub URL")


CommitHash = Annotated[
str,
StringConstraints(
pattern=r"^[0-9a-fA-F]{7,60}$",
strip_whitespace=True,
),
]


class Code(DataModel):
"""Code or script identifier"""

url: str = Field(..., title="Code URL", description="URL to code repository")
name: Optional[str] = Field(default=None, title="Name")
version: Optional[str] = Field(default=None, title="Code version")
commit_hash: Optional[CommitHash] = Field(default=None, title="Commit hash", description="Commit hash of the code.")

container: Optional[Container] = Field(default=None, title="Container")
run_script: Optional[Path] = Field(default=None, title="Run script", description="Path to run script")
Comment thread
bruno-f-cruz marked this conversation as resolved.
Expand All @@ -92,3 +105,14 @@ class Code(DataModel):
title="Core dependency",
description="For code with a core software package dependency, e.g. Bonsai",
)

@model_validator(mode="after")
def _ensure_commit_hash_or_version(self) -> "Code":
"""Ensure that at least one of commit_hash or version is provided for code identification"""
if not self.commit_hash and not self.version:
warnings.warn(
"Neither commit_hash nor version provided for Code. "
"It's recommended to provide at least one to ensure reproducibility. "
"In the future, we will require at least one of these fields."
)
return self
33 changes: 32 additions & 1 deletion tests/test_identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from pydantic import ValidationError

from aind_data_schema.components.identifiers import Person
from aind_data_schema.components.identifiers import Code, Person


class Testexperimenter(unittest.TestCase):
Expand All @@ -23,5 +23,36 @@ def test_experimenter_missing_fields(self):
Person()


class TestGitHash(unittest.TestCase):
"""Test GitHash type validation via Code model"""

def test_git_hash_valid(self):
"""Valid git hashes are accepted and stored correctly"""
cases = [
("abc1234", "abc1234"),
("a" * 40, "a" * 40),
("a" * 60, "a" * 60),
("aBcDeF1", "aBcDeF1"),
("deadbeef1234", "deadbeef1234"),
(" abc1234 ", "abc1234"), # strip_whitespace=True strips before validation
]
for git_hash, expected in cases:
with self.subTest(git_hash=git_hash):
code = Code(url="https://github.com/org/repo", commit_hash=git_hash)
self.assertEqual(code.commit_hash, expected)

def test_git_hash_invalid(self):
"""Invalid git hashes raise ValidationError"""
cases = [
"abc123", # too short (6 chars)
"a" * 61, # too long (61 chars)
"xyz12345", # non-hex characters
]
for git_hash in cases:
with self.subTest(git_hash=git_hash):
with self.assertRaises(ValidationError):
Code(url="https://github.com/org/repo", commit_hash=git_hash)


if __name__ == "__main__":
unittest.main()
Loading