diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4a9b47d..b95345e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,9 @@ on: push: pull_request: +permissions: + contents: read + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -54,3 +57,71 @@ jobs: - name: Run checks run: make ci-py + + coverage: + name: Coverage + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' + needs: + - rust + - python + permissions: + contents: read + id-token: write + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + with: + version: "0.9.16" + enable-cache: true + + - name: Set up Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + components: llvm-tools-preview + + - name: Cargo cache + uses: Swatinem/rust-cache@v2 + + - name: Install cargo-llvm-cov + run: cargo install cargo-llvm-cov --locked + + - name: Run Python coverage + run: make py-coverage + + - name: Run Rust coverage + run: make rust-coverage + + - name: Upload to Codecov (pull requests) + if: github.event_name == 'pull_request' + uses: codecov/codecov-action@v5 + with: + files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: false + verbose: true + + - name: Upload to Codecov (pushes) + if: github.event_name == 'push' + uses: codecov/codecov-action@v5 + with: + files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov + fail_ci_if_error: false + use_oidc: true + verbose: true + + - name: Upload coverage artifacts + uses: actions/upload-artifact@v4 + with: + name: coverage-reports + path: | + atompack-py/coverage + coverage diff --git a/Makefile b/Makefile index e7d7fd6..95f6a52 100644 --- a/Makefile +++ b/Makefile @@ -7,11 +7,11 @@ ATOMPACK_PERF_COLOR ?= always override UV_CACHE_DIR := $(CURDIR)/.uv-cache .PHONY: help \ - rust-fmt rust-fmt-check rust-lint rust-test \ - py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks \ + rust-fmt rust-fmt-check rust-lint rust-test rust-coverage \ + py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks py-coverage \ perf-smoke-rust perf-smoke-py perf-smoke \ docs-sync docs-build docs \ - fmt fmt-check lint test \ + fmt fmt-check lint test coverage \ ci-rust ci-py ci help: @@ -33,6 +33,9 @@ help: @echo " make py-dev uv maturin develop (atompack-py)" @echo " make py-dev-release uv maturin develop -r (atompack-py)" @echo " make perf-smoke Run opt-in Rust + Python release throughput smoke tests" + @echo " make py-coverage uv pytest-cov core suite with XML + HTML reports" + @echo " make rust-coverage cargo llvm-cov workspace report in coverage/rust.lcov" + @echo " make coverage Run both Python and Rust coverage targets" @echo "" @echo "Docs:" @echo " make docs-sync Install docs deps (uv, atompack-py docs group)" @@ -51,6 +54,11 @@ rust-lint: rust-test: cargo test --workspace +rust-coverage: + @command -v cargo-llvm-cov >/dev/null 2>&1 || (echo "cargo-llvm-cov not found; install with 'cargo install cargo-llvm-cov'" && exit 1) + mkdir -p coverage + cargo llvm-cov --workspace --lcov --output-path coverage/rust.lcov + py-sync: @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) cd atompack-py && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --extra dev --locked @@ -96,6 +104,10 @@ perf-smoke-py: py-dev-release perf-smoke: perf-smoke-rust perf-smoke-py +py-coverage: py-dev + @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) + cd atompack-py && mkdir -p coverage && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) run --extra dev --locked --with pytest-cov pytest tests --ignore=tests/benchmarks --cov=atompack --cov-report=term-missing --cov-report=xml:coverage/python-coverage.xml --cov-report=html:coverage/htmlcov + docs-sync: @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --project atompack-py --group docs --locked @@ -121,6 +133,8 @@ lint: rust-lint py-lint test: rust-test py-test +coverage: rust-coverage py-coverage + ci-rust: rust-fmt-check rust-lint rust-test ci-py: py-fmt-check py-lint py-test diff --git a/README.md b/README.md index e7af74e..fcb9b90 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # Atompack +

+ + Codecov + +

+ Append-only molecule storage for atomistic ML datasets. Atompack is a Python package plus Rust core crate for writing, reading, and distributing molecular diff --git a/atompack-py/python/atompack/__init__.py b/atompack-py/python/atompack/__init__.py index 34b97bb..3e83c7c 100644 --- a/atompack-py/python/atompack/__init__.py +++ b/atompack-py/python/atompack/__init__.py @@ -37,12 +37,35 @@ `Database.open(path, mmap=False)` if you want to append molecules. """ +from typing import Any, Iterator + from . import hub from ._atompack_rs import PyAtom as Atom from ._atompack_rs import PyAtomDatabase as Database from ._atompack_rs import PyMolecule as Molecule from .ase_bridge import add_ase_batch, from_ase, to_ase, to_ase_batch + +def _database_iter_batches( + database: Database, + batch_size: int, + *, + flat: bool = False, + drop_last: bool = False, +) -> Iterator[list[Molecule] | dict[str, Any]]: + if batch_size <= 0: + raise ValueError("batch_size must be a positive integer") + + getter = database.get_molecules_flat if flat else database.get_molecules + for start in range(0, len(database), batch_size): + stop = min(start + batch_size, len(database)) + if drop_last and stop - start < batch_size: + break + yield getter(list(range(start, stop))) + + +Database.iter_batches = _database_iter_batches + __version__ = "0.2.1" __all__ = [ "Atom", diff --git a/atompack-py/python/atompack/__init__.pyi b/atompack-py/python/atompack/__init__.pyi index 67dddae..7f486cc 100644 --- a/atompack-py/python/atompack/__init__.pyi +++ b/atompack-py/python/atompack/__init__.pyi @@ -1,6 +1,6 @@ """Type stubs for atompack""" -from typing import Any, Sequence, overload +from typing import Any, Iterator, Sequence, overload import numpy as np import numpy.typing as npt @@ -565,6 +565,20 @@ class Database: ``properties`` and ``atom_properties`` dictionaries when present. """ ... + def iter_batches( + self, + batch_size: int, + *, + flat: bool = False, + drop_last: bool = False, + ) -> Iterator[list[Molecule] | dict[str, Any]]: + """ + Yield contiguous batches from the database. + + Set ``flat=True`` to yield ``get_molecules_flat`` payloads instead of + materialized Molecule objects. + """ + ... def to_ase_batch( self, indices: list[int] | None = None, diff --git a/atompack-py/tests/test_database.py b/atompack-py/tests/test_database.py index 6973049..bb171f0 100644 --- a/atompack-py/tests/test_database.py +++ b/atompack-py/tests/test_database.py @@ -248,6 +248,37 @@ def test_database_add_arrays_batch_roundtrip_with_custom_properties(tmp_path: Pa assert second.get_property("phase") == "valid" +def test_database_iter_batches_supports_object_and_flat_batches(tmp_path: Path) -> None: + path = tmp_path / "iter_batches.atp" + db = atompack.Database(str(path)) + db.add_molecules( + [ + _make_molecule(-1.0), + _make_molecule(-2.0), + _make_molecule(-3.0), + _make_molecule(-4.0), + _make_molecule(-5.0), + ] + ) + db.flush() + + reopened = atompack.Database.open(str(path)) + object_batches = list(reopened.iter_batches(2)) + assert [[m.energy for m in batch] for batch in object_batches] == [ + [-1.0, -2.0], + [-3.0, -4.0], + [-5.0], + ] + + flat_batches = list(reopened.iter_batches(2, flat=True, drop_last=True)) + assert len(flat_batches) == 2 + np.testing.assert_allclose(flat_batches[0]["energy"], np.array([-1.0, -2.0])) + np.testing.assert_allclose(flat_batches[1]["energy"], np.array([-3.0, -4.0])) + + with pytest.raises(ValueError, match="positive"): + list(reopened.iter_batches(0)) + + @pytest.mark.parametrize("mmap", [False, True]) @pytest.mark.parametrize("compression", ["none", "lz4", "zstd"]) def test_database_single_item_reads_are_view_compatible( diff --git a/atompack-py/tests/test_stub_surface.py b/atompack-py/tests/test_stub_surface.py index b883fcc..6cf572e 100644 --- a/atompack-py/tests/test_stub_surface.py +++ b/atompack-py/tests/test_stub_surface.py @@ -72,7 +72,7 @@ def test_private_stub_tracks_low_level_surface() -> None: def test_public_stub_exposes_flat_batch_reader() -> None: database_methods = _class_method_names(PUBLIC_STUB, "Database") - assert "get_molecules_flat" in database_methods + assert {"get_molecules_flat", "iter_batches"} <= database_methods def test_hub_stub_has_public_docstrings() -> None: