Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 23 additions & 9 deletions src/sdf_xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,21 +96,35 @@ def _process_latex_name(variable_name: str) -> str:
return variable_name


def _resolve_glob(path_glob: PathLike | Iterable[PathLike]):
"""
Normalise input path_glob into a sorted list of absolute, resolved Path objects.
def _resolve_glob(path_glob: PathLike | Iterable[PathLike]) -> list[Path]:
"""Resolve an input path/glob to sorted absolute SDF file paths.

Accepts either a single path-like glob pattern (e.g. ``"*.sdf"``) or a
list of path-like file names.
"""

try:
if isinstance(path_glob, PathLike):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure about this change. I'd normally trust duck typing over direct type checks. Trying p = Path(path_glob) and catching the exception if it fails is probably safer.

p = Path(path_glob)
paths = list(p.parent.glob(p.name)) if p.name == "*.sdf" else list(p)
except TypeError:
if p.is_dir():
raise TypeError(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is less an issue of typing and more an issue of passing an incompatible value.

Suggested change
raise TypeError(
raise ValueError(

"Directory paths are not supported; pass a glob pattern"
f"(e.g. {(p / '*.sdf').as_posix()}) or a list of file paths"
f"(e.g. {[(p / '0000.sdf').as_posix(), (p / '0001.sdf').as_posix()]})."
)
paths = list(p.parent.glob(p.name))
else:
paths = list({Path(p) for p in path_glob})

paths = sorted(p.resolve() for p in paths)
if not paths:
resolved_paths = sorted(p.resolve() for p in paths)
if not resolved_paths:
raise FileNotFoundError(f"No files matched pattern or input: {path_glob!r}")
return paths

for p in resolved_paths:
if not p.is_file():
raise FileNotFoundError(f"{p.as_posix()} does not exist or is not a file.")
if p.suffix.lower() != ".sdf":
raise FileNotFoundError(f"{p.as_posix()} is not an SDF file.")
Comment on lines +125 to +126
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it matter if users are renaming their sdf files? The previous behaviour was to enforce the use of .sdf as a file extension, so happy for you to keep this if you prefer, but I'd be tempted to let people pass whatever filenames they want.

return resolved_paths


def _build_datatree_from_dataset(
Expand Down
12 changes: 12 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,18 @@ def test_resolve_glob_from_string_pattern():
assert result == expected


def test_resolve_glob_from_multiple_names_string_pattern(tmp_path):
mock_test_files_dir = tmp_path
(mock_test_files_dir / "normal_0000.sdf").touch()
(mock_test_files_dir / "normal_0001.sdf").touch()
(mock_test_files_dir / "other_0000.sdf").touch()

pattern = str(mock_test_files_dir / "normal_*.sdf")
result = _resolve_glob(pattern)
expected = sorted(mock_test_files_dir.glob("normal_*.sdf"))
assert result == expected


def test_resolve_glob_from_path_glob():
pattern = TEST_FILES_DIR.glob("*.sdf")
result = _resolve_glob(pattern)
Expand Down
Loading