Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ on:
push:
pull_request:

permissions:
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
Expand Down Expand Up @@ -54,3 +57,71 @@ jobs:

- name: Run checks
run: make ci-py

coverage:
name: Coverage
runs-on: ubuntu-latest
if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main'
needs:
- rust
- python
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Set up uv
uses: astral-sh/setup-uv@v7
with:
version: "0.9.16"
enable-cache: true

- name: Set up Rust
uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: stable
components: llvm-tools-preview

- name: Cargo cache
uses: Swatinem/rust-cache@v2

- name: Install cargo-llvm-cov
run: cargo install cargo-llvm-cov --locked

- name: Run Python coverage
run: make py-coverage

- name: Run Rust coverage
run: make rust-coverage

- name: Upload to Codecov (pull requests)
if: github.event_name == 'pull_request'
uses: codecov/codecov-action@v5
with:
files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: false
verbose: true

- name: Upload to Codecov (pushes)
if: github.event_name == 'push'
uses: codecov/codecov-action@v5
with:
files: atompack-py/coverage/python-coverage.xml,coverage/rust.lcov
fail_ci_if_error: false
use_oidc: true
verbose: true

- name: Upload coverage artifacts
uses: actions/upload-artifact@v4
with:
name: coverage-reports
path: |
atompack-py/coverage
coverage
20 changes: 17 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ ATOMPACK_PERF_COLOR ?= always
override UV_CACHE_DIR := $(CURDIR)/.uv-cache

.PHONY: help \
rust-fmt rust-fmt-check rust-lint rust-test \
py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks \
rust-fmt rust-fmt-check rust-lint rust-test rust-coverage \
py-sync py-fmt py-fmt-check py-lint py-lint-fix py-dev py-dev-release py-test py-test-benchmarks py-coverage \
perf-smoke-rust perf-smoke-py perf-smoke \
docs-sync docs-build docs \
fmt fmt-check lint test \
fmt fmt-check lint test coverage \
ci-rust ci-py ci

help:
Expand All @@ -33,6 +33,9 @@ help:
@echo " make py-dev uv maturin develop (atompack-py)"
@echo " make py-dev-release uv maturin develop -r (atompack-py)"
@echo " make perf-smoke Run opt-in Rust + Python release throughput smoke tests"
@echo " make py-coverage uv pytest-cov core suite with XML + HTML reports"
@echo " make rust-coverage cargo llvm-cov workspace report in coverage/rust.lcov"
@echo " make coverage Run both Python and Rust coverage targets"
@echo ""
@echo "Docs:"
@echo " make docs-sync Install docs deps (uv, atompack-py docs group)"
Expand All @@ -51,6 +54,11 @@ rust-lint:
rust-test:
cargo test --workspace

rust-coverage:
@command -v cargo-llvm-cov >/dev/null 2>&1 || (echo "cargo-llvm-cov not found; install with 'cargo install cargo-llvm-cov'" && exit 1)
mkdir -p coverage
cargo llvm-cov --workspace --lcov --output-path coverage/rust.lcov

py-sync:
@command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1)
cd atompack-py && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --extra dev --locked
Expand Down Expand Up @@ -96,6 +104,10 @@ perf-smoke-py: py-dev-release

perf-smoke: perf-smoke-rust perf-smoke-py

py-coverage: py-dev
@command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1)
cd atompack-py && mkdir -p coverage && UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) run --extra dev --locked --with pytest-cov pytest tests --ignore=tests/benchmarks --cov=atompack --cov-report=term-missing --cov-report=xml:coverage/python-coverage.xml --cov-report=html:coverage/htmlcov

docs-sync:
@command -v $(UV) >/dev/null 2>&1 || (echo "uv not found; install from https://docs.astral.sh/uv/" && exit 1)
UV_CACHE_DIR=$(UV_CACHE_DIR) $(UV) sync --project atompack-py --group docs --locked
Expand All @@ -121,6 +133,8 @@ lint: rust-lint py-lint

test: rust-test py-test

coverage: rust-coverage py-coverage

ci-rust: rust-fmt-check rust-lint rust-test

ci-py: py-fmt-check py-lint py-test
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Atompack

<p align="center">
<a href="https://codecov.io/gh/LeMaterial/atompack">
<img alt="Codecov" src="https://codecov.io/gh/LeMaterial/atompack/branch/main/graph/badge.svg">
</a>
</p>

Append-only molecule storage for atomistic ML datasets.

Atompack is a Python package plus Rust core crate for writing, reading, and distributing molecular
Expand Down
23 changes: 23 additions & 0 deletions atompack-py/python/atompack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,35 @@
`Database.open(path, mmap=False)` if you want to append molecules.
"""

from typing import Any, Iterator

from . import hub
from ._atompack_rs import PyAtom as Atom
from ._atompack_rs import PyAtomDatabase as Database
from ._atompack_rs import PyMolecule as Molecule
from .ase_bridge import add_ase_batch, from_ase, to_ase, to_ase_batch


def _database_iter_batches(
database: Database,
batch_size: int,
*,
flat: bool = False,
drop_last: bool = False,
) -> Iterator[list[Molecule] | dict[str, Any]]:
if batch_size <= 0:
raise ValueError("batch_size must be a positive integer")

getter = database.get_molecules_flat if flat else database.get_molecules
for start in range(0, len(database), batch_size):
stop = min(start + batch_size, len(database))
if drop_last and stop - start < batch_size:
break
yield getter(list(range(start, stop)))


Database.iter_batches = _database_iter_batches

__version__ = "0.2.1"
__all__ = [
"Atom",
Expand Down
16 changes: 15 additions & 1 deletion atompack-py/python/atompack/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Type stubs for atompack"""

from typing import Any, Sequence, overload
from typing import Any, Iterator, Sequence, overload

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -565,6 +565,20 @@ class Database:
``properties`` and ``atom_properties`` dictionaries when present.
"""
...
def iter_batches(
self,
batch_size: int,
*,
flat: bool = False,
drop_last: bool = False,
) -> Iterator[list[Molecule] | dict[str, Any]]:
"""
Yield contiguous batches from the database.

Set ``flat=True`` to yield ``get_molecules_flat`` payloads instead of
materialized Molecule objects.
"""
...
def to_ase_batch(
self,
indices: list[int] | None = None,
Expand Down
31 changes: 31 additions & 0 deletions atompack-py/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,37 @@ def test_database_add_arrays_batch_roundtrip_with_custom_properties(tmp_path: Pa
assert second.get_property("phase") == "valid"


def test_database_iter_batches_supports_object_and_flat_batches(tmp_path: Path) -> None:
path = tmp_path / "iter_batches.atp"
db = atompack.Database(str(path))
db.add_molecules(
[
_make_molecule(-1.0),
_make_molecule(-2.0),
_make_molecule(-3.0),
_make_molecule(-4.0),
_make_molecule(-5.0),
]
)
db.flush()

reopened = atompack.Database.open(str(path))
object_batches = list(reopened.iter_batches(2))
assert [[m.energy for m in batch] for batch in object_batches] == [
[-1.0, -2.0],
[-3.0, -4.0],
[-5.0],
]

flat_batches = list(reopened.iter_batches(2, flat=True, drop_last=True))
assert len(flat_batches) == 2
np.testing.assert_allclose(flat_batches[0]["energy"], np.array([-1.0, -2.0]))
np.testing.assert_allclose(flat_batches[1]["energy"], np.array([-3.0, -4.0]))

with pytest.raises(ValueError, match="positive"):
list(reopened.iter_batches(0))


@pytest.mark.parametrize("mmap", [False, True])
@pytest.mark.parametrize("compression", ["none", "lz4", "zstd"])
def test_database_single_item_reads_are_view_compatible(
Expand Down
2 changes: 1 addition & 1 deletion atompack-py/tests/test_stub_surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_private_stub_tracks_low_level_surface() -> None:

def test_public_stub_exposes_flat_batch_reader() -> None:
database_methods = _class_method_names(PUBLIC_STUB, "Database")
assert "get_molecules_flat" in database_methods
assert {"get_molecules_flat", "iter_batches"} <= database_methods


def test_hub_stub_has_public_docstrings() -> None:
Expand Down
Loading