diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4a9b47d..b95345e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,9 @@ on: push: pull_request: +permissions: + contents: read + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -54,3 +57,71 @@ jobs: - name: Run checks run: make ci-py + + coverage: + name: Coverage + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' + needs: + - rust + - python + permissions: + contents: read + id-token: write + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Set up uv + uses: astral-sh/setup-uv@v7 + with: + version: "0.9.16" + enable-cache: true + + - name: Set up Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + components: llvm-tools-preview + + - name: Cargo cache + uses: Swatinem/rust-cache@v2 + + - name: Install cargo-llvm-cov + run: cargo install cargo-llvm-cov --locked + + - name: Run Python coverage + run: make py-coverage + + - name: Run Rust coverage + run: make rust-coverage + + - name: Upload to Codecov (pull requests) + if: github.event_name == 'pull_request' + uses: codecov/codecov-action@v5 + with: + files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: false + verbose: true + + - name: Upload to Codecov (pushes) + if: github.event_name == 'push' + uses: codecov/codecov-action@v5 + with: + files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov + fail_ci_if_error: false + use_oidc: true + verbose: true + + - name: Upload coverage artifacts + uses: actions/upload-artifact@v4 + with: + name: coverage-reports + path: | + atompack-py/coverage + coverage diff --git a/Makefile b/Makefile index e7d7fd6..95f6a52 100644 --- a/Makefile +++ b/Makefile @@ -7,11 +7,11 @@ ATOMPACK_PERF_COLOR ?= always override UV_CACHE_DIR := $(CURDIR)/.uv-cache .PHONY: help \ - rust-fmt rust-fmt-check rust-lint rust-test \ - py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks \ + rust-fmt rust-fmt-check rust-lint rust-test rust-coverage \ + py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks py-coverage \ perf-smoke-rust perf-smoke-py perf-smoke \ docs-sync docs-build docs \ - fmt fmt-check lint test \ + fmt fmt-check lint test coverage \ ci-rust ci-py ci help: @@ -33,6 +33,9 @@ help: @echo " make py-dev uv maturin develop (atompack-py)" @echo " make py-dev-release uv maturin develop -r (atompack-py)" @echo " make perf-smoke Run opt-in Rust + Python release throughput smoke tests" + @echo " make py-coverage uv pytest-cov core suite with XML + HTML reports" + @echo " make rust-coverage cargo llvm-cov workspace report in coverage/rust.lcov" + @echo " make coverage Run both Python and Rust coverage targets" @echo "" @echo "Docs:" @echo " make docs-sync Install docs deps (uv, atompack-py docs group)" @@ -51,6 +54,11 @@ rust-lint: rust-test: cargo test --workspace +rust-coverage: + @command -v cargo-llvm-cov >/dev/null 2>&1 || (echo "cargo-llvm-cov not found; install with 'cargo install cargo-llvm-cov'" && exit 1) + mkdir -p coverage + cargo llvm-cov --workspace --lcov --output-path coverage/rust.lcov + py-sync: @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) cd atompack-py && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --extra dev --locked @@ -96,6 +104,10 @@ perf-smoke-py: py-dev-release perf-smoke: perf-smoke-rust perf-smoke-py +py-coverage: py-dev + @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) + cd atompack-py && mkdir -p coverage && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) run --extra dev --locked --with pytest-cov pytest tests --ignore=tests/benchmarks --cov=atompack --cov-report=term-missing --cov-report=xml:coverage/python-coverage.xml --cov-report=html:coverage/htmlcov + docs-sync: @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1) UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --project atompack-py --group docs --locked @@ -121,6 +133,8 @@ lint: rust-lint py-lint test: rust-test py-test +coverage: rust-coverage py-coverage + ci-rust: rust-fmt-check rust-lint rust-test ci-py: py-fmt-check py-lint py-test diff --git a/README.md b/README.md index e7af74e..fcb9b90 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # Atompack +

+ + Codecov + +

+ Append-only molecule storage for atomistic ML datasets. Atompack is a Python package plus Rust core crate for writing, reading, and distributing molecular diff --git a/atompack-py/python/atompack/__init__.pyi b/atompack-py/python/atompack/__init__.pyi index 67dddae..fd7b12f 100644 --- a/atompack-py/python/atompack/__init__.pyi +++ b/atompack-py/python/atompack/__init__.pyi @@ -534,7 +534,7 @@ class Database: Parameters ---------- index : int - Molecule index (0-based) + Molecule index (0-based). Negative indices are supported. Returns ------- @@ -549,7 +549,7 @@ class Database: Parameters ---------- indices : list of int - Molecule indices (0-based) + Molecule indices (0-based). Negative indices are supported. Returns ------- @@ -608,7 +608,7 @@ class Database: Parameters ---------- index : int - Molecule index (0-based) + Molecule index (0-based). Negative indices are supported. Returns ------- diff --git a/atompack-py/python/atompack/_atompack_rs.pyi b/atompack-py/python/atompack/_atompack_rs.pyi index 99e8730..3b4c284 100644 --- a/atompack-py/python/atompack/_atompack_rs.pyi +++ b/atompack-py/python/atompack/_atompack_rs.pyi @@ -475,7 +475,7 @@ class PyAtomDatabase: Parameters ---------- index : int - Molecule index (0-based) + Molecule index (0-based). Negative indices are supported. Returns ------- @@ -491,7 +491,7 @@ class PyAtomDatabase: Parameters ---------- indices : sequence of int - Molecule indices (0-based) + Molecule indices (0-based). Negative indices are supported. Returns ------- diff --git a/atompack-py/src/database.rs b/atompack-py/src/database.rs index f472cc9..0da931f 100644 --- a/atompack-py/src/database.rs +++ b/atompack-py/src/database.rs @@ -11,6 +11,31 @@ pub(crate) struct PyAtomDatabase { } impl PyAtomDatabase { + fn normalize_index(&self, index: isize) -> PyResult { + let len = self.inner.len(); + let normalized = if index < 0 { + (len as isize) + .checked_add(index) + .ok_or_else(|| PyIndexError::new_err("index underflow"))? + } else { + index + }; + if normalized < 0 || normalized >= len as isize { + return Err(PyIndexError::new_err(format!( + "Index {} out of bounds for database of length {}", + index, len + ))); + } + Ok(normalized as usize) + } + + fn normalize_indices(&self, indices: Vec) -> PyResult> { + indices + .into_iter() + .map(|index| self.normalize_index(index)) + .collect() + } + fn single_molecule_view(&self, py: Python<'_>, index: usize) -> PyResult { let compression = self.inner.compression(); let use_mmap = self.inner.get_compressed_slice(0).is_some(); @@ -211,15 +236,11 @@ impl PyAtomDatabase { } /// Get a molecule by index as a lazy view-backed molecule. - fn get_molecule(&self, py: Python<'_>, index: usize) -> PyResult { - let len = self.inner.len(); - if index >= len { - return Err(PyIndexError::new_err(format!( - "Index {} out of bounds for database of length {}", - index, len - ))); - } - Ok(PyMolecule::from_view(self.single_molecule_view(py, index)?)) + fn get_molecule(&self, py: Python<'_>, index: isize) -> PyResult { + let normalized = self.normalize_index(index)?; + Ok(PyMolecule::from_view( + self.single_molecule_view(py, normalized)?, + )) } /// Get multiple molecules by indices (parallel batch reading) @@ -232,10 +253,11 @@ impl PyAtomDatabase { /// /// Returns: /// - List of molecules - fn get_molecules(&self, py: Python<'_>, indices: Vec) -> PyResult> { + fn get_molecules(&self, py: Python<'_>, indices: Vec) -> PyResult> { if indices.is_empty() { return Ok(Vec::new()); } + let indices = self.normalize_indices(indices)?; let compression = self.inner.compression(); let use_mmap = self.inner.get_compressed_slice(0).is_some(); @@ -317,8 +339,9 @@ impl PyAtomDatabase { fn get_molecules_flat<'py>( &self, py: Python<'py>, - indices: Vec, + indices: Vec, ) -> PyResult> { + let indices = self.normalize_indices(indices)?; flat::get_molecules_flat_soa_impl(&self.inner, py, indices) } @@ -328,7 +351,7 @@ impl PyAtomDatabase { } /// Enable indexing: db[i] - fn __getitem__(&self, py: Python<'_>, index: usize) -> PyResult { + fn __getitem__(&self, py: Python<'_>, index: isize) -> PyResult { self.get_molecule(py, index) } diff --git a/atompack-py/tests/test_database.py b/atompack-py/tests/test_database.py index 6973049..de42ef1 100644 --- a/atompack-py/tests/test_database.py +++ b/atompack-py/tests/test_database.py @@ -248,6 +248,28 @@ def test_database_add_arrays_batch_roundtrip_with_custom_properties(tmp_path: Pa assert second.get_property("phase") == "valid" +def test_database_negative_indices_work_across_read_apis(tmp_path: Path) -> None: + path = tmp_path / "negative_indices.atp" + db = atompack.Database(str(path)) + db.add_molecules([_make_molecule(-1.0), _make_molecule(-2.0), _make_molecule(-3.0)]) + db.flush() + + reopened = atompack.Database.open(str(path)) + + assert reopened[-1].energy == pytest.approx(-3.0) + assert reopened.get_molecule(-2).energy == pytest.approx(-2.0) + assert [m.energy for m in reopened.get_molecules([-1, 0, -3])] == pytest.approx( + [-3.0, -1.0, -1.0] + ) + np.testing.assert_allclose( + reopened.get_molecules_flat([-1, -2])["energy"], + np.array([-3.0, -2.0], dtype=np.float64), + ) + + with pytest.raises(IndexError, match="out of bounds"): + reopened.get_molecule(-4) + + @pytest.mark.parametrize("mmap", [False, True]) @pytest.mark.parametrize("compression", ["none", "lz4", "zstd"]) def test_database_single_item_reads_are_view_compatible( @@ -596,19 +618,15 @@ def test_database_open_mmap_populate(tmp_path: Path) -> None: assert db_r[0].energy == pytest.approx(-3.0) -def test_database_negative_indexing_raises_overflow_error(tmp_path: Path) -> None: - # Database does not support negative indexing today. PyO3 extracts the - # index argument as `usize`, so a negative integer raises OverflowError - # at the FFI boundary. If wraparound semantics are ever added, this - # test will fail loudly so the intent is explicit. +def test_database_negative_indexing_out_of_bounds_raises_index_error(tmp_path: Path) -> None: path = tmp_path / "negidx.atp" db = atompack.Database(str(path)) db.add_molecule(_make_molecule(-1.0)) db.flush() db_r = atompack.Database.open(str(path)) - with pytest.raises(OverflowError, match=r"negative"): - _ = db_r[-1] + with pytest.raises(IndexError, match=r"out of bounds"): + _ = db_r[-2] def test_database_empty_molecule_roundtrip(tmp_path: Path) -> None: