Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ on:
push:
pull_request:

permissions:
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
Expand Down Expand Up @@ -54,3 +57,71 @@ jobs:

- name: Run checks
run: make ci-py

coverage:
name: Coverage
runs-on: ubuntu-latest
if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main'
needs:
- rust
- python
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Set up uv
uses: astral-sh/setup-uv@v7
with:
version: "0.9.16"
enable-cache: true

- name: Set up Rust
uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: stable
components: llvm-tools-preview

- name: Cargo cache
uses: Swatinem/rust-cache@v2

- name: Install cargo-llvm-cov
run: cargo install cargo-llvm-cov --locked

- name: Run Python coverage
run: make py-coverage

- name: Run Rust coverage
run: make rust-coverage

- name: Upload to Codecov (pull requests)
if: github.event_name == 'pull_request'
uses: codecov/codecov-action@v5
with:
files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: false
verbose: true

- name: Upload to Codecov (pushes)
if: github.event_name == 'push'
uses: codecov/codecov-action@v5
with:
files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov
fail_ci_if_error: false
use_oidc: true
verbose: true

- name: Upload coverage artifacts
uses: actions/upload-artifact@v4
with:
name: coverage-reports
path: |
atompack-py/coverage
coverage
20 changes: 17 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ ATOMPACK_PERF_COLOR ?= always
override UV_CACHE_DIR := $(CURDIR)/.uv-cache

.PHONY: help \
rust-fmt rust-fmt-check rust-lint rust-test \
py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks \
rust-fmt rust-fmt-check rust-lint rust-test rust-coverage \
py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks py-coverage \
perf-smoke-rust perf-smoke-py perf-smoke \
docs-sync docs-build docs \
fmt fmt-check lint test \
fmt fmt-check lint test coverage \
ci-rust ci-py ci

help:
Expand All @@ -33,6 +33,9 @@ help:
@echo " make py-dev uv maturin develop (atompack-py)"
@echo " make py-dev-release uv maturin develop -r (atompack-py)"
@echo " make perf-smoke Run opt-in Rust + Python release throughput smoke tests"
@echo " make py-coverage uv pytest-cov core suite with XML + HTML reports"
@echo " make rust-coverage cargo llvm-cov workspace report in coverage/rust.lcov"
@echo " make coverage Run both Python and Rust coverage targets"
@echo ""
@echo "Docs:"
@echo " make docs-sync Install docs deps (uv, atompack-py docs group)"
Expand All @@ -51,6 +54,11 @@ rust-lint:
rust-test:
cargo test --workspace

rust-coverage:
@command -v cargo-llvm-cov >/dev/null 2>&1 || (echo "cargo-llvm-cov not found; install with 'cargo install cargo-llvm-cov'" && exit 1)
mkdir -p coverage
cargo llvm-cov --workspace --lcov --output-path coverage/rust.lcov

py-sync:
@command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1)
cd atompack-py && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --extra dev --locked
Expand Down Expand Up @@ -96,6 +104,10 @@ perf-smoke-py: py-dev-release

perf-smoke: perf-smoke-rust perf-smoke-py

py-coverage: py-dev
@command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1)
cd atompack-py && mkdir -p coverage && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) run --extra dev --locked --with pytest-cov pytest tests --ignore=tests/benchmarks --cov=atompack --cov-report=term-missing --cov-report=xml:coverage/python-coverage.xml --cov-report=html:coverage/htmlcov

docs-sync:
@command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1)
UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --project atompack-py --group docs --locked
Expand All @@ -121,6 +133,8 @@ lint: rust-lint py-lint

test: rust-test py-test

coverage: rust-coverage py-coverage

ci-rust: rust-fmt-check rust-lint rust-test

ci-py: py-fmt-check py-lint py-test
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Atompack

<p align="center">
<a href="https://codecov.io/gh/LeMaterial/atompack">
<img alt="Codecov" src="https://codecov.io/gh/LeMaterial/atompack/branch/main/graph/badge.svg">
</a>
</p>

Append-only molecule storage for atomistic ML datasets.

Atompack is a Python package plus Rust core crate for writing, reading, and distributing molecular
Expand Down
6 changes: 3 additions & 3 deletions atompack-py/python/atompack/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ class Database:
Parameters
----------
index : int
Molecule index (0-based)
Molecule index (0-based). Negative indices are supported.

Returns
-------
Expand All @@ -549,7 +549,7 @@ class Database:
Parameters
----------
indices : list of int
Molecule indices (0-based)
Molecule indices (0-based). Negative indices are supported.

Returns
-------
Expand Down Expand Up @@ -608,7 +608,7 @@ class Database:
Parameters
----------
index : int
Molecule index (0-based)
Molecule index (0-based). Negative indices are supported.

Returns
-------
Expand Down
4 changes: 2 additions & 2 deletions atompack-py/python/atompack/_atompack_rs.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ class PyAtomDatabase:
Parameters
----------
index : int
Molecule index (0-based)
Molecule index (0-based). Negative indices are supported.

Returns
-------
Expand All @@ -491,7 +491,7 @@ class PyAtomDatabase:
Parameters
----------
indices : sequence of int
Molecule indices (0-based)
Molecule indices (0-based). Negative indices are supported.

Returns
-------
Expand Down
47 changes: 35 additions & 12 deletions atompack-py/src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,31 @@ pub(crate) struct PyAtomDatabase {
}

impl PyAtomDatabase {
fn normalize_index(&self, index: isize) -> PyResult<usize> {
let len = self.inner.len();
let normalized = if index < 0 {
(len as isize)
.checked_add(index)
.ok_or_else(|| PyIndexError::new_err("index underflow"))?
} else {
index
};
if normalized < 0 || normalized >= len as isize {
return Err(PyIndexError::new_err(format!(
"Index {} out of bounds for database of length {}",
index, len
)));
}
Ok(normalized as usize)
}

fn normalize_indices(&self, indices: Vec<isize>) -> PyResult<Vec<usize>> {
indices
.into_iter()
.map(|index| self.normalize_index(index))
.collect()
}

fn single_molecule_view(&self, py: Python<'_>, index: usize) -> PyResult<SoaMoleculeView> {
let compression = self.inner.compression();
let use_mmap = self.inner.get_compressed_slice(0).is_some();
Expand Down Expand Up @@ -211,15 +236,11 @@ impl PyAtomDatabase {
}

/// Get a molecule by index as a lazy view-backed molecule.
fn get_molecule(&self, py: Python<'_>, index: usize) -> PyResult<PyMolecule> {
let len = self.inner.len();
if index >= len {
return Err(PyIndexError::new_err(format!(
"Index {} out of bounds for database of length {}",
index, len
)));
}
Ok(PyMolecule::from_view(self.single_molecule_view(py, index)?))
fn get_molecule(&self, py: Python<'_>, index: isize) -> PyResult<PyMolecule> {
let normalized = self.normalize_index(index)?;
Ok(PyMolecule::from_view(
self.single_molecule_view(py, normalized)?,
))
}

/// Get multiple molecules by indices (parallel batch reading)
Expand All @@ -232,10 +253,11 @@ impl PyAtomDatabase {
///
/// Returns:
/// - List of molecules
fn get_molecules(&self, py: Python<'_>, indices: Vec<usize>) -> PyResult<Vec<PyMolecule>> {
fn get_molecules(&self, py: Python<'_>, indices: Vec<isize>) -> PyResult<Vec<PyMolecule>> {
if indices.is_empty() {
return Ok(Vec::new());
}
let indices = self.normalize_indices(indices)?;

let compression = self.inner.compression();
let use_mmap = self.inner.get_compressed_slice(0).is_some();
Expand Down Expand Up @@ -317,8 +339,9 @@ impl PyAtomDatabase {
fn get_molecules_flat<'py>(
&self,
py: Python<'py>,
indices: Vec<usize>,
indices: Vec<isize>,
) -> PyResult<Bound<'py, PyDict>> {
let indices = self.normalize_indices(indices)?;
flat::get_molecules_flat_soa_impl(&self.inner, py, indices)
}

Expand All @@ -328,7 +351,7 @@ impl PyAtomDatabase {
}

/// Enable indexing: db[i]
fn __getitem__(&self, py: Python<'_>, index: usize) -> PyResult<PyMolecule> {
fn __getitem__(&self, py: Python<'_>, index: isize) -> PyResult<PyMolecule> {
self.get_molecule(py, index)
}

Expand Down
32 changes: 25 additions & 7 deletions atompack-py/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,28 @@ def test_database_add_arrays_batch_roundtrip_with_custom_properties(tmp_path: Pa
assert second.get_property("phase") == "valid"


def test_database_negative_indices_work_across_read_apis(tmp_path: Path) -> None:
path = tmp_path / "negative_indices.atp"
db = atompack.Database(str(path))
db.add_molecules([_make_molecule(-1.0), _make_molecule(-2.0), _make_molecule(-3.0)])
db.flush()

reopened = atompack.Database.open(str(path))

assert reopened[-1].energy == pytest.approx(-3.0)
assert reopened.get_molecule(-2).energy == pytest.approx(-2.0)
assert [m.energy for m in reopened.get_molecules([-1, 0, -3])] == pytest.approx(
[-3.0, -1.0, -1.0]
)
np.testing.assert_allclose(
reopened.get_molecules_flat([-1, -2])["energy"],
np.array([-3.0, -2.0], dtype=np.float64),
)

with pytest.raises(IndexError, match="out of bounds"):
reopened.get_molecule(-4)


@pytest.mark.parametrize("mmap", [False, True])
@pytest.mark.parametrize("compression", ["none", "lz4", "zstd"])
def test_database_single_item_reads_are_view_compatible(
Expand Down Expand Up @@ -596,19 +618,15 @@ def test_database_open_mmap_populate(tmp_path: Path) -> None:
assert db_r[0].energy == pytest.approx(-3.0)


def test_database_negative_indexing_raises_overflow_error(tmp_path: Path) -> None:
# Database does not support negative indexing today. PyO3 extracts the
# index argument as `usize`, so a negative integer raises OverflowError
# at the FFI boundary. If wraparound semantics are ever added, this
# test will fail loudly so the intent is explicit.
def test_database_negative_indexing_out_of_bounds_raises_index_error(tmp_path: Path) -> None:
path = tmp_path / "negidx.atp"
db = atompack.Database(str(path))
db.add_molecule(_make_molecule(-1.0))
db.flush()

db_r = atompack.Database.open(str(path))
with pytest.raises(OverflowError, match=r"negative"):
_ = db_r[-1]
with pytest.raises(IndexError, match=r"out of bounds"):
_ = db_r[-2]


def test_database_empty_molecule_roundtrip(tmp_path: Path) -> None:
Expand Down
Loading