diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4a9b47d..b95345e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,6 +4,9 @@ on:
push:
pull_request:
+permissions:
+ contents: read
+
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
@@ -54,3 +57,71 @@ jobs:
- name: Run checks
run: make ci-py
+
+ coverage:
+ name: Coverage
+ runs-on: ubuntu-latest
+ if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main'
+ needs:
+ - rust
+ - python
+ permissions:
+ contents: read
+ id-token: write
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+
+ - name: Set up uv
+ uses: astral-sh/setup-uv@v7
+ with:
+ version: "0.9.16"
+ enable-cache: true
+
+ - name: Set up Rust
+ uses: actions-rust-lang/setup-rust-toolchain@v1
+ with:
+ toolchain: stable
+ components: llvm-tools-preview
+
+ - name: Cargo cache
+ uses: Swatinem/rust-cache@v2
+
+ - name: Install cargo-llvm-cov
+ run: cargo install cargo-llvm-cov --locked
+
+ - name: Run Python coverage
+ run: make py-coverage
+
+ - name: Run Rust coverage
+ run: make rust-coverage
+
+ - name: Upload to Codecov (pull requests)
+ if: github.event_name == 'pull_request'
+ uses: codecov/codecov-action@v5
+ with:
+ files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov
+ token: ${{ secrets.CODECOV_TOKEN }}
+ fail_ci_if_error: false
+ verbose: true
+
+ - name: Upload to Codecov (pushes)
+ if: github.event_name == 'push'
+ uses: codecov/codecov-action@v5
+ with:
+ files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov
+ fail_ci_if_error: false
+ use_oidc: true
+ verbose: true
+
+ - name: Upload coverage artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: coverage-reports
+ path: |
+ atompack-py/coverage
+ coverage
diff --git a/Makefile b/Makefile
index e7d7fd6..95f6a52 100644
--- a/Makefile
+++ b/Makefile
@@ -7,11 +7,11 @@ ATOMPACK_PERF_COLOR ?= always
override UV_CACHE_DIR := $(CURDIR)/.uv-cache
.PHONY: help \
- rust-fmt rust-fmt-check rust-lint rust-test \
- py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks \
+ rust-fmt rust-fmt-check rust-lint rust-test rust-coverage \
+ py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks py-coverage \
perf-smoke-rust perf-smoke-py perf-smoke \
docs-sync docs-build docs \
- fmt fmt-check lint test \
+ fmt fmt-check lint test coverage \
ci-rust ci-py ci
help:
@@ -33,6 +33,9 @@ help:
@echo " make py-dev uv maturin develop (atompack-py)"
@echo " make py-dev-release uv maturin develop -r (atompack-py)"
@echo " make perf-smoke Run opt-in Rust + Python release throughput smoke tests"
+ @echo " make py-coverage uv pytest-cov core suite with XML + HTML reports"
+ @echo " make rust-coverage cargo llvm-cov workspace report in coverage/rust.lcov"
+ @echo " make coverage Run both Python and Rust coverage targets"
@echo ""
@echo "Docs:"
@echo " make docs-sync Install docs deps (uv, atompack-py docs group)"
@@ -51,6 +54,11 @@ rust-lint:
rust-test:
cargo test --workspace
+rust-coverage:
+ @command -v cargo-llvm-cov >/dev/null 2>&1 || (echo "cargo-llvm-cov not found; install with 'cargo install cargo-llvm-cov'" && exit 1)
+ mkdir -p coverage
+ cargo llvm-cov --workspace --lcov --output-path coverage/rust.lcov
+
py-sync:
@command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1)
cd atompack-py && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --extra dev --locked
@@ -96,6 +104,10 @@ perf-smoke-py: py-dev-release
perf-smoke: perf-smoke-rust perf-smoke-py
+py-coverage: py-dev
+ @command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1)
+ cd atompack-py && mkdir -p coverage && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) run --extra dev --locked --with pytest-cov pytest tests --ignore=tests/benchmarks --cov=atompack --cov-report=term-missing --cov-report=xml:coverage/python-coverage.xml --cov-report=html:coverage/htmlcov
+
docs-sync:
@command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1)
UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --project atompack-py --group docs --locked
@@ -121,6 +133,8 @@ lint: rust-lint py-lint
test: rust-test py-test
+coverage: rust-coverage py-coverage
+
ci-rust: rust-fmt-check rust-lint rust-test
ci-py: py-fmt-check py-lint py-test
diff --git a/README.md b/README.md
index e7af74e..fcb9b90 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,11 @@
# Atompack
+
+
+
+
+
+
Append-only molecule storage for atomistic ML datasets.
Atompack is a Python package plus Rust core crate for writing, reading, and distributing molecular
diff --git a/atompack-py/python/atompack/__init__.pyi b/atompack-py/python/atompack/__init__.pyi
index 67dddae..fd7b12f 100644
--- a/atompack-py/python/atompack/__init__.pyi
+++ b/atompack-py/python/atompack/__init__.pyi
@@ -534,7 +534,7 @@ class Database:
Parameters
----------
index : int
- Molecule index (0-based)
+ Molecule index (0-based). Negative indices are supported.
Returns
-------
@@ -549,7 +549,7 @@ class Database:
Parameters
----------
indices : list of int
- Molecule indices (0-based)
+ Molecule indices (0-based). Negative indices are supported.
Returns
-------
@@ -608,7 +608,7 @@ class Database:
Parameters
----------
index : int
- Molecule index (0-based)
+ Molecule index (0-based). Negative indices are supported.
Returns
-------
diff --git a/atompack-py/python/atompack/_atompack_rs.pyi b/atompack-py/python/atompack/_atompack_rs.pyi
index 99e8730..3b4c284 100644
--- a/atompack-py/python/atompack/_atompack_rs.pyi
+++ b/atompack-py/python/atompack/_atompack_rs.pyi
@@ -475,7 +475,7 @@ class PyAtomDatabase:
Parameters
----------
index : int
- Molecule index (0-based)
+ Molecule index (0-based). Negative indices are supported.
Returns
-------
@@ -491,7 +491,7 @@ class PyAtomDatabase:
Parameters
----------
indices : sequence of int
- Molecule indices (0-based)
+ Molecule indices (0-based). Negative indices are supported.
Returns
-------
diff --git a/atompack-py/src/database.rs b/atompack-py/src/database.rs
index f472cc9..0da931f 100644
--- a/atompack-py/src/database.rs
+++ b/atompack-py/src/database.rs
@@ -11,6 +11,31 @@ pub(crate) struct PyAtomDatabase {
}
impl PyAtomDatabase {
+ fn normalize_index(&self, index: isize) -> PyResult {
+ let len = self.inner.len();
+ let normalized = if index < 0 {
+ (len as isize)
+ .checked_add(index)
+ .ok_or_else(|| PyIndexError::new_err("index underflow"))?
+ } else {
+ index
+ };
+ if normalized < 0 || normalized >= len as isize {
+ return Err(PyIndexError::new_err(format!(
+ "Index {} out of bounds for database of length {}",
+ index, len
+ )));
+ }
+ Ok(normalized as usize)
+ }
+
+ fn normalize_indices(&self, indices: Vec) -> PyResult> {
+ indices
+ .into_iter()
+ .map(|index| self.normalize_index(index))
+ .collect()
+ }
+
fn single_molecule_view(&self, py: Python<'_>, index: usize) -> PyResult {
let compression = self.inner.compression();
let use_mmap = self.inner.get_compressed_slice(0).is_some();
@@ -211,15 +236,11 @@ impl PyAtomDatabase {
}
/// Get a molecule by index as a lazy view-backed molecule.
- fn get_molecule(&self, py: Python<'_>, index: usize) -> PyResult {
- let len = self.inner.len();
- if index >= len {
- return Err(PyIndexError::new_err(format!(
- "Index {} out of bounds for database of length {}",
- index, len
- )));
- }
- Ok(PyMolecule::from_view(self.single_molecule_view(py, index)?))
+ fn get_molecule(&self, py: Python<'_>, index: isize) -> PyResult {
+ let normalized = self.normalize_index(index)?;
+ Ok(PyMolecule::from_view(
+ self.single_molecule_view(py, normalized)?,
+ ))
}
/// Get multiple molecules by indices (parallel batch reading)
@@ -232,10 +253,11 @@ impl PyAtomDatabase {
///
/// Returns:
/// - List of molecules
- fn get_molecules(&self, py: Python<'_>, indices: Vec) -> PyResult> {
+ fn get_molecules(&self, py: Python<'_>, indices: Vec) -> PyResult> {
if indices.is_empty() {
return Ok(Vec::new());
}
+ let indices = self.normalize_indices(indices)?;
let compression = self.inner.compression();
let use_mmap = self.inner.get_compressed_slice(0).is_some();
@@ -317,8 +339,9 @@ impl PyAtomDatabase {
fn get_molecules_flat<'py>(
&self,
py: Python<'py>,
- indices: Vec,
+ indices: Vec,
) -> PyResult> {
+ let indices = self.normalize_indices(indices)?;
flat::get_molecules_flat_soa_impl(&self.inner, py, indices)
}
@@ -328,7 +351,7 @@ impl PyAtomDatabase {
}
/// Enable indexing: db[i]
- fn __getitem__(&self, py: Python<'_>, index: usize) -> PyResult {
+ fn __getitem__(&self, py: Python<'_>, index: isize) -> PyResult {
self.get_molecule(py, index)
}
diff --git a/atompack-py/tests/test_database.py b/atompack-py/tests/test_database.py
index 6973049..de42ef1 100644
--- a/atompack-py/tests/test_database.py
+++ b/atompack-py/tests/test_database.py
@@ -248,6 +248,28 @@ def test_database_add_arrays_batch_roundtrip_with_custom_properties(tmp_path: Pa
assert second.get_property("phase") == "valid"
+def test_database_negative_indices_work_across_read_apis(tmp_path: Path) -> None:
+ path = tmp_path / "negative_indices.atp"
+ db = atompack.Database(str(path))
+ db.add_molecules([_make_molecule(-1.0), _make_molecule(-2.0), _make_molecule(-3.0)])
+ db.flush()
+
+ reopened = atompack.Database.open(str(path))
+
+ assert reopened[-1].energy == pytest.approx(-3.0)
+ assert reopened.get_molecule(-2).energy == pytest.approx(-2.0)
+ assert [m.energy for m in reopened.get_molecules([-1, 0, -3])] == pytest.approx(
+ [-3.0, -1.0, -1.0]
+ )
+ np.testing.assert_allclose(
+ reopened.get_molecules_flat([-1, -2])["energy"],
+ np.array([-3.0, -2.0], dtype=np.float64),
+ )
+
+ with pytest.raises(IndexError, match="out of bounds"):
+ reopened.get_molecule(-4)
+
+
@pytest.mark.parametrize("mmap", [False, True])
@pytest.mark.parametrize("compression", ["none", "lz4", "zstd"])
def test_database_single_item_reads_are_view_compatible(
@@ -596,19 +618,15 @@ def test_database_open_mmap_populate(tmp_path: Path) -> None:
assert db_r[0].energy == pytest.approx(-3.0)
-def test_database_negative_indexing_raises_overflow_error(tmp_path: Path) -> None:
- # Database does not support negative indexing today. PyO3 extracts the
- # index argument as `usize`, so a negative integer raises OverflowError
- # at the FFI boundary. If wraparound semantics are ever added, this
- # test will fail loudly so the intent is explicit.
+def test_database_negative_indexing_out_of_bounds_raises_index_error(tmp_path: Path) -> None:
path = tmp_path / "negidx.atp"
db = atompack.Database(str(path))
db.add_molecule(_make_molecule(-1.0))
db.flush()
db_r = atompack.Database.open(str(path))
- with pytest.raises(OverflowError, match=r"negative"):
- _ = db_r[-1]
+ with pytest.raises(IndexError, match=r"out of bounds"):
+ _ = db_r[-2]
def test_database_empty_molecule_roundtrip(tmp_path: Path) -> None: