diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4a9b47d..b95345e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,9 @@ on: push: pull_request: +permissions: + contents: read + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -54,3 +57,71 @@ jobs: - name: Run checks run: make ci-py + + coverage: + name: Coverage + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' + needs: + - rust + - python + permissions: + contents: read + id-token: write + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + with: + version: "0.9.16" + enable-cache: true + + - name: Set up Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + components: llvm-tools-preview + + - name: Cargo cache + uses: Swatinem/rust-cache@v2 + + - name: Install cargo-llvm-cov + run: cargo install cargo-llvm-cov --locked + + - name: Run Python coverage + run: make py-coverage + + - name: Run Rust coverage + run: make rust-coverage + + - name: Upload to Codecov (pull requests) + if: github.event_name == 'pull_request' + uses: codecov/codecov-action@v5 + with: + files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: false + verbose: true + + - name: Upload to Codecov (pushes) + if: github.event_name == 'push' + uses: codecov/codecov-action@v5 + with: + files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov + fail_ci_if_error: false + use_oidc: true + verbose: true + + - name: Upload coverage artifacts + uses: actions/upload-artifact@v4 + with: + name: coverage-reports + path: | + atompack-py/coverage + coverage diff --git a/Makefile b/Makefile index e7d7fd6..95f6a52 100644 --- a/Makefile +++ b/Makefile @@ -7,11 +7,11 @@ ATOMPACK_PERF_COLOR ?= always override UV_CACHE_DIR := $(CURDIR)/.uv-cache .PHONY: help \ - rust-fmt rust-fmt-check rust-lint rust-test \ - py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks \ + rust-fmt rust-fmt-check rust-lint rust-test rust-coverage \ + py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks py-coverage \ perf-smoke-rust perf-smoke-py perf-smoke \ docs-sync docs-build docs \ - fmt fmt-check lint test \ + fmt fmt-check lint test coverage \ ci-rust ci-py ci help: @@ -33,6 +33,9 @@ help: @echo " make py-dev uv maturin develop (atompack-py)" @echo " make py-dev-release uv maturin develop -r (atompack-py)" @echo " make perf-smoke Run opt-in Rust + Python release throughput smoke tests" + @echo " make py-coverage uv pytest-cov core suite with XML + HTML reports" + @echo " make rust-coverage cargo llvm-cov workspace report in coverage/rust.lcov" + @echo " make coverage Run both Python and Rust coverage targets" @echo "" @echo "Docs:" @echo " make docs-sync Install docs deps (uv, atompack-py docs group)" @@ -51,6 +54,11 @@ rust-lint: rust-test: cargo test --workspace +rust-coverage: + @command -v cargo-llvm-cov >/dev/null 2>&1 || (echo "cargo-llvm-cov not found; install with 'cargo install cargo-llvm-cov'" && exit 1) + mkdir -p coverage + cargo llvm-cov --workspace --lcov --output-path coverage/rust.lcov + py-sync: @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) cd atompack-py && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --extra dev --locked @@ -96,6 +104,10 @@ perf-smoke-py: py-dev-release perf-smoke: perf-smoke-rust perf-smoke-py +py-coverage: py-dev + @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) + cd atompack-py && mkdir -p coverage && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) run --extra dev --locked --with pytest-cov pytest tests --ignore=tests/benchmarks --cov=atompack --cov-report=term-missing --cov-report=xml:coverage/python-coverage.xml --cov-report=html:coverage/htmlcov + docs-sync: @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --project atompack-py --group docs --locked @@ -121,6 +133,8 @@ lint: rust-lint py-lint test: rust-test py-test +coverage: rust-coverage py-coverage + ci-rust: rust-fmt-check rust-lint rust-test ci-py: py-fmt-check py-lint py-test diff --git a/README.md b/README.md index e7af74e..fcb9b90 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # Atompack +

+ + Codecov + +

+ Append-only molecule storage for atomistic ML datasets. Atompack is a Python package plus Rust core crate for writing, reading, and distributing molecular diff --git a/atompack-py/python/atompack/hub.py b/atompack-py/python/atompack/hub.py index 0fb8a19..bd4defe 100644 --- a/atompack-py/python/atompack/hub.py +++ b/atompack-py/python/atompack/hub.py @@ -171,9 +171,18 @@ def __len__(self) -> int: self._ensure_open() return self._total_length - def __getitem__(self, index: int) -> Molecule: + def __getitem__(self, index: int | slice) -> Molecule | list[Molecule]: + if isinstance(index, slice): + self._ensure_open() + start, stop, step = index.indices(self._total_length) + return self.get_molecules(list(range(start, stop, step))) return self.get_molecule(index) + def __iter__(self): + self._ensure_open() + for index in range(self._total_length): + yield self.get_molecule(index) + def get_molecule(self, index: int) -> Molecule: db_index, local_index = self._locate(index) return self._databases[db_index][local_index] diff --git a/atompack-py/python/atompack/hub.pyi b/atompack-py/python/atompack/hub.pyi index 134283d..7aed48e 100644 --- a/atompack-py/python/atompack/hub.pyi +++ b/atompack-py/python/atompack/hub.pyi @@ -4,7 +4,7 @@ from __future__ import annotations from pathlib import Path from types import TracebackType -from typing import Any, Sequence +from typing import Any, Iterator, Sequence, overload from . import Molecule @@ -26,6 +26,7 @@ class AtompackReader: def __len__(self) -> int: """Return the total number of molecules across all opened files.""" ... + @overload def __getitem__(self, index: int) -> Molecule: """ Fetch one molecule by index. @@ -34,6 +35,8 @@ class AtompackReader: dataset, not within a single shard. """ ... + @overload + def __getitem__(self, index: slice) -> list[Molecule]: ... def get_molecule(self, index: int) -> Molecule: """ Fetch one molecule by global index across the underlying shard set. @@ -75,6 +78,9 @@ class AtompackReader: def close(self) -> None: """Close the underlying databases and invalidate the reader.""" ... + def __iter__(self) -> Iterator[Molecule]: + """Iterate over molecules in logical reader order.""" + ... def download( repo_id: str, diff --git a/atompack-py/tests/test_hub.py b/atompack-py/tests/test_hub.py index c7db2a0..fd7700a 100644 --- a/atompack-py/tests/test_hub.py +++ b/atompack-py/tests/test_hub.py @@ -415,6 +415,21 @@ def test_open_path_directory_flattens_lexicographically(tmp_path: Path) -> None: assert [reader[i].energy for i in range(len(reader))] == pytest.approx([-1.0, -2.0, -3.0]) +def test_reader_supports_iteration_and_slices(tmp_path: Path) -> None: + shard_dir = tmp_path / "shards" + shard_dir.mkdir() + _make_db(shard_dir / "a.atp", [-1.0, -2.0]) + _make_db(shard_dir / "b.atp", [-3.0, -4.0]) + + reader = atompack.hub.open_path(shard_dir) + + assert [molecule.energy for molecule in reader] == pytest.approx([-1.0, -2.0, -3.0, -4.0]) + assert [molecule.energy for molecule in reader[1:4:2]] == pytest.approx([-2.0, -4.0]) + assert [molecule.energy for molecule in reader[::-1]] == pytest.approx( + [-4.0, -3.0, -2.0, -1.0] + ) + + def test_open_path_context_manager_closes_reader(tmp_path: Path) -> None: source = tmp_path / "single.atp" _make_db(source, [-1.0]) diff --git a/atompack-py/tests/test_stub_surface.py b/atompack-py/tests/test_stub_surface.py index b883fcc..f4cc377 100644 --- a/atompack-py/tests/test_stub_surface.py +++ b/atompack-py/tests/test_stub_surface.py @@ -76,6 +76,9 @@ def test_public_stub_exposes_flat_batch_reader() -> None: def test_hub_stub_has_public_docstrings() -> None: + reader_methods = _class_method_names(HUB_STUB, "AtompackReader") + assert {"__getitem__", "__iter__"} <= reader_methods + reader_doc = _class_docstring(HUB_STUB, "AtompackReader") or "" assert "lexicographically ordered shard set" in reader_doc