From a392639775f01fed65ac168a317f72a1db73955b Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 11 Jun 2026 01:04:26 -0400 Subject: [PATCH 1/3] fix(axis): index bounds, empty-sequence index, transform validation, and __dir__ Part of #1143: - B8b: Axis.__getitem__ no longer wraps ax[-size-1] into the underflow bin; out-of-range negative indices now raise IndexError, and the error message reports the requested index and the axis size. - B9: _isstr no longer treats empty sequences/arrays as strings, so numerical axes accept .index([]). StrCategory still accepts empty sized iterables, and one-shot iterators are no longer consumed by the check. - B14c: passing a transform class (e.g. transform.Pow) or a non-AxisTransform object (e.g. 'log') now raises a clear TypeError instead of relying on direct-base introspection or failing with AttributeError. - B15: ArrayTuple.__dir__ now lists ndarray attributes instead of calling dir() on a string literal. Assisted-by: ClaudeCode:claude-fable-5 --- src/boost_histogram/axis/__init__.py | 50 +++++++++++++++++++------- tests/test_axes_object.py | 12 +++++++ tests/test_axis.py | 54 ++++++++++++++++++++++++++-- 3 files changed, 101 insertions(+), 15 deletions(-) diff --git a/src/boost_histogram/axis/__init__.py b/src/boost_histogram/axis/__init__.py index f7435413..19cf5f57 100644 --- a/src/boost_histogram/axis/__init__.py +++ b/src/boost_histogram/axis/__init__.py @@ -43,16 +43,35 @@ def __dir__() -> list[str]: def _isstr(value: Any) -> bool: """ - Check to see if this is a stringlike or a (nested) iterable of stringlikes + Check to see if this is a stringlike or a non-empty (nested) iterable of + stringlikes. Empty iterables are not considered stringlike. Iterables + without a length (one-shot iterators) are not inspected, since checking + them would consume them. """ if isinstance(value, (str, bytes)): return True - if hasattr(value, "__iter__"): - return all(_isstr(v) for v in value) + if isinstance(value, np.ndarray): + if value.dtype.kind in {"S", "U"}: + return True + if value.dtype.kind == "O": + return value.size > 0 and all(_isstr(v) for v in value.flat) + return False + if hasattr(value, "__iter__") and hasattr(value, "__len__"): + return len(value) > 0 and all(_isstr(v) for v in value) return False +def _is_empty_sized(value: Any) -> bool: + """ + Check for a sized, empty iterable (without consuming one-shot iterators). + """ + try: + return len(value) == 0 + except TypeError: + return False + + def _opts(**kwargs: bool) -> set[str]: return {k for k, v in kwargs.items() if v} @@ -274,11 +293,13 @@ def __getitem__(self, i: AxCallOrInt) -> int | str | tuple[float, float]: if callable(i): i = i(self) else: + size: int = self._ax.size + requested = i if i < 0: - i += self._ax.size - if i >= self._ax.size: + i += size + if i < 0 or i >= size: raise IndexError( - f"Out of range access, {i} is more than {self._ax.size}" + f"Out of range access, {requested} is out of range for axis with size {size}" ) assert not callable(i) return self.bin(i) @@ -371,11 +392,14 @@ def __init__( if options != {"underflow", "overflow"}: raise KeyError("Transform supplied, cannot change other options") - if ( - not isinstance(transform, AxisTransform) - and AxisTransform in transform.__bases__ # type: ignore[unreachable] - ): - raise TypeError(f"You must pass an instance, use {transform}()") + if not isinstance(transform, AxisTransform): + if isinstance(transform, type) and issubclass( # type: ignore[unreachable] + transform, AxisTransform + ): + msg = f"You must pass an instance, use {transform.__name__}()" + raise TypeError(msg) + msg = f"transform must be an AxisTransform instance, got {transform!r}" + raise TypeError(msg) ax = transform._produce(bins, start, stop) @@ -695,7 +719,7 @@ def index(self, value: float | str) -> int: Return the fractional index(es) given a value (or values) on the axis. """ - if _isstr(value): + if _isstr(value) or _is_empty_sized(value): return self._ax.index(value) # type: ignore[no-any-return] msg = f"index({value}) must be a string or iterable of strings for a StrCategory axis" @@ -823,7 +847,7 @@ def __getattr__(self, name: str) -> Any: return self.__class__(getattr(a, name) for a in self) def __dir__(self) -> list[str]: - names = dir(self.__class__) + dir("np.typing.NDArray[Any]") + names = set(dir(self.__class__)) | set(dir(np.ndarray)) | self._REDUCTIONS return sorted(n for n in names if not n.startswith("_")) def __call__(self, *args: Any, **kwargs: Any) -> Any: diff --git a/tests/test_axes_object.py b/tests/test_axes_object.py index 9d02160b..3f8388a3 100644 --- a/tests/test_axes_object.py +++ b/tests/test_axes_object.py @@ -82,3 +82,15 @@ def test_axis_misconstuct(): with pytest.raises(TypeError): bh.axis.AxesTuple(inp[0]) + + +def test_array_tuple_dir(h): + # Issue #1143 (B15): __dir__ used dir() of a string literal + listing = dir(h.axes.centers) + + assert "sum" in listing + assert "broadcast" in listing + # ndarray attributes are forwarded, so they should be listed + assert "shape" in listing + assert "T" in listing + assert "upper" not in listing # no str methods diff --git a/tests/test_axis.py b/tests/test_axis.py index 7827ce06..582e177d 100644 --- a/tests/test_axis.py +++ b/tests/test_axis.py @@ -167,11 +167,10 @@ def test_init(self): with pytest.raises(TypeError): bh.axis.Regular(1, 1.0, 2.0, bad_keyword="ra") - with pytest.raises(AttributeError): + with pytest.raises(TypeError): bh.axis.Regular(1, 1.0, 2.0, transform=lambda _: 2) with pytest.raises(TypeError): bh.axis.Regular(1, 1.0, 2.0, transform=bh.axis.transform.Pow) - # TODO: These errors could be better def test_traits(self): STD_TRAITS = {"continuous": True, "ordered": True} @@ -910,3 +909,54 @@ def test_edges_centers_widths(self): assert_allclose(a.edges, [0.0, 1.0, 2.0]) assert_allclose(a.centers, [0.5, 1.5]) assert_allclose(a.widths, [1, 1]) + + +# Issue #1143 regression tests + + +def test_getitem_negative_wraparound(): + # Issue #1143 (B8b): ax[-size-1] used to wrap into the underflow bin + ax = bh.axis.Regular(10, 0, 1) + + assert ax[-10] == ax[0] + assert ax[-1] == ax[9] + + with pytest.raises(IndexError, match="-11"): + ax[-11] + with pytest.raises(IndexError, match="10"): + ax[10] + + +def test_index_empty_sequence(): + # Issue #1143 (B9): empty sequences are not strings + assert bh.axis.Regular(10, 0, 1).index([]).size == 0 + assert bh.axis.Regular(10, 0, 1).index(np.array([])).size == 0 + assert bh.axis.Integer(0, 5).index(()).size == 0 + + # Still rejects actual strings + with pytest.raises(TypeError): + bh.axis.Regular(10, 0, 1).index("hi") + with pytest.raises(TypeError): + bh.axis.Regular(10, 0, 1).index(["hi", "ho"]) + + # StrCategory still accepts empty and string input + ax = bh.axis.StrCategory(["a", "b"]) + assert ax.index([]).size == 0 + assert ax.index(np.array([])).size == 0 + assert ax.index("a") == 0 + assert_array_equal(ax.index(["b", "a"]), [1, 0]) + + with pytest.raises(TypeError): + ax.index([1, 2]) + + +def test_transform_validation(): + # Issue #1143 (B14c): clear TypeErrors for invalid transform arguments + with pytest.raises(TypeError, match=r"use Pow\(\)"): + bh.axis.Regular(10, 1, 100, transform=bh.axis.transform.Pow) + + with pytest.raises(TypeError, match="AxisTransform"): + bh.axis.Regular(10, 1, 100, transform="log") + + with pytest.raises(TypeError, match="AxisTransform"): + bh.axis.Regular(10, 1, 100, transform=42) From 2f963d5a92ee0e0479fad30c0666e3fc4e0d9a73 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 11 Jun 2026 01:04:59 -0400 Subject: [PATCH 2/3] fix: surface the "did you forget to compile" hint when _core is missing Part of #1143 (B10): the guarded _core import in histogram.py was dead code, since boost_histogram/__init__.py imported submodules (which import _core directly) before histogram.py ever ran, so users only saw a bare ModuleNotFoundError. Move the guard into the package __init__, before any submodule import, and drop the redundant guard in histogram.py (which keeps its plain import). Also pass the combined message (original error plus hint) on Python 3.10, as originally intended. Assisted-by: ClaudeCode:claude-fable-5 --- src/boost_histogram/__init__.py | 18 ++++++++++++++++++ src/boost_histogram/histogram.py | 17 ----------------- tests/test_histogram.py | 26 ++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 17 deletions(-) diff --git a/src/boost_histogram/__init__.py b/src/boost_histogram/__init__.py index 8747d591..c5d8103e 100644 --- a/src/boost_histogram/__init__.py +++ b/src/boost_histogram/__init__.py @@ -1,5 +1,23 @@ from __future__ import annotations +import sys + +try: + from . import _core # noqa: F401 +except ImportError as err: + if "_core" not in str(err): + raise + + new_msg = "Did you forget to compile boost-histogram? Use CMake or scikit-build-core to build, see the readme." + + if sys.version_info >= (3, 11): + err.add_note(new_msg) + raise + + total_msg = f"{err}\n{new_msg}" + new_exception = type(err)(total_msg, name=err.name, path=err.path) + raise new_exception from err + from . import accumulators, axis, numpy, storage from .histogram import Histogram, IndexingExpr, Kind from .tag import ( # pylint: disable=redefined-builtin diff --git a/src/boost_histogram/histogram.py b/src/boost_histogram/histogram.py index 1067e51e..4fcd9e12 100644 --- a/src/boost_histogram/histogram.py +++ b/src/boost_histogram/histogram.py @@ -4,7 +4,6 @@ import copy import enum import logging -import sys import threading import typing import warnings @@ -47,22 +46,6 @@ WeightedSum, ) -try: - from . import _core -except ImportError as err: - if "_core" not in str(err): - raise - - new_msg = "Did you forget to compile boost-histogram? Use CMake or scikit-build-core to build, see the readme." - - if sys.version_info >= (3, 11): - err.add_note(new_msg) - raise - - total_msg = f"{err}\n{new_msg}" - new_exception = type(err)(new_msg, name=err.name, path=err.path) - raise new_exception from err - # This is a StrEnum as defined in Python 3.11 class Kind(str, enum.Enum): diff --git a/tests/test_histogram.py b/tests/test_histogram.py index ff8bea9e..9d9690b7 100644 --- a/tests/test_histogram.py +++ b/tests/test_histogram.py @@ -5,6 +5,7 @@ import operator import pickle import platform +import subprocess import sys import threading from collections import OrderedDict @@ -1719,3 +1720,28 @@ def test_allclose_different_continuous_traits(): h2 = bh.Histogram(bh.axis.Integer(0, 3)) assert not h1.allclose(h2) + + +@pytest.mark.skipif(sys.platform.startswith("emscripten"), reason="needs subprocess") +def test_missing_core_hint(): + # Issue #1143 (B10): if the compiled _core module is missing, the import + # error must include the "did you forget to compile" hint. This only works + # if boost_histogram/__init__.py imports _core before any submodule does. + code = """ +import sys +import importlib.abc + +class Blocker(importlib.abc.MetaPathFinder): + def find_spec(self, name, path=None, target=None): + if name == "boost_histogram._core": + raise ModuleNotFoundError(f"No module named {name!r}", name=name) + return None + +sys.meta_path.insert(0, Blocker()) +import boost_histogram +""" + result = subprocess.run( + [sys.executable, "-c", code], capture_output=True, text=True, check=False + ) + assert result.returncode != 0 + assert "Did you forget to compile boost-histogram?" in result.stderr From d231f8acd7dfcf35b08d6d719847984bc531f600 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 11 Jun 2026 01:05:09 -0400 Subject: [PATCH 3/3] fix(hist): __setitem__ flow transfer, threaded scalar fill, index bounds, and clearer errors Part of #1143: - B1: setting from a Histogram (h[...] = h2) now keeps the value's flow bins; np.asarray(value) called __array__, which silently dropped them so the expanded-setting branch could never match for histogram values. - B5: threaded fill no longer crashes on scalar positional arguments or 0-d array weights/samples; scalars are broadcast against each thread's chunk, and an all-scalar fill is performed exactly once instead of once per thread. - B8a: integer indexing accepts -size (bin 0); the bounds check used abs(), wrongly rejecting it. - B14a: a plain string index (h['a']) now raises a clear IndexError pointing at the locator protocol instead of being treated as a per-character list of indices. - B14b: invalid storage arguments raise TypeError with an accurate message both for non-storage objects (previously an issubclass() crash) and for uninitialized storage classes (previously a KeyError with an inverted message). Assisted-by: ClaudeCode:claude-fable-5 --- src/boost_histogram/histogram.py | 51 ++++++++++++++++++++++---------- tests/test_histogram.py | 3 +- tests/test_histogram_indexing.py | 25 ++++++++++++++++ tests/test_histogram_init.py | 9 ++++++ tests/test_histogram_set.py | 39 ++++++++++++++++++++++++ tests/test_threaded_fill.py | 36 ++++++++++++++++++++++ 6 files changed, 146 insertions(+), 17 deletions(-) diff --git a/src/boost_histogram/histogram.py b/src/boost_histogram/histogram.py index 4fcd9e12..528b222b 100644 --- a/src/boost_histogram/histogram.py +++ b/src/boost_histogram/histogram.py @@ -359,13 +359,13 @@ def __init__( # Check for missed parenthesis or incorrect types if not isinstance(resolved_storage, Storage): - msg_storage = ( # type: ignore[unreachable] - "Passing in an initialized storage has been removed. Please add ()." - ) - msg_unknown = "Only storages allowed in storage argument" - raise KeyError( - msg_storage if issubclass(resolved_storage, Storage) else msg_unknown - ) + if isinstance(resolved_storage, type) and issubclass( # type: ignore[unreachable] + resolved_storage, Storage + ): + msg = f"Storages need to be initialized; use {resolved_storage.__name__}() instead. Please add ()." + raise TypeError(msg) + msg = f"Only storages allowed in storage argument, got {resolved_storage!r}" + raise TypeError(msg) # Allow a tuple to represent a regular axis axes = tuple(_arg_shortcut(arg) for arg in axes) # type: ignore[arg-type] @@ -893,22 +893,33 @@ def fill( }: raise RuntimeError("Mean histograms do not support threaded filling") - data: list[list[np.typing.NDArray[Any]] | list[str]] = [] + # If everything is scalar, there is only a single fill; threading would + # incorrectly repeat it, so fill directly instead. + if ( + all(isinstance(a, str) or np.ndim(a) == 0 for a in args_ars) + and (weight_ars is None or np.ndim(weight_ars) == 0) + and (sample_ars is None or np.ndim(sample_ars) == 0) + ): + self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars) + return self + + data: list[list[Any]] = [] for a in args_ars: - if isinstance(a, str): + if isinstance(a, str) or np.ndim(a) == 0: + # Scalars broadcast against each thread's chunk data.append([a] * threads) else: - data.append(np.array_split(np.asarray(a), threads)) + data.append(list(np.array_split(np.asarray(a), threads))) weights: list[Any] - if weight is None or np.isscalar(weight): + if weight_ars is None or np.ndim(weight_ars) == 0: assert threads is not None weights = [weight_ars] * threads else: weights = np.array_split(np.asarray(weight_ars), threads) samples: list[Any] - if sample_ars is None or np.isscalar(sample_ars): + if sample_ars is None or np.ndim(sample_ars) == 0: assert threads is not None samples = [sample_ars] * threads else: @@ -1080,9 +1091,11 @@ def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing: raise TypeError(f"Index {index} must be an integer, not float") if isinstance(index, SupportsIndex): - if abs(int(index)) >= self._hist.axis(axis).size: + idx = int(index) + size: int = self._hist.axis(axis).size + if not -size <= idx < size: raise IndexError("histogram index is out of range") - return int(index) % self._hist.axis(axis).size + return idx % size return index @@ -1345,7 +1358,9 @@ def __getitem__( pick_each[i] = ind.__index__() + ( 1 if self.axes[i].traits.underflow else 0 ) - case collections.abc.Sequence(): + # str/bytes are Sequences but not valid indices; they fall + # through to the IndexError below. + case collections.abc.Sequence() if not isinstance(ind, (str, bytes)): # type: ignore[unreachable] pick_set[i] = list(ind) case slice(start=start, stop=stop, step=step): reduced, new_slices, new_integrations = self._handle_slice( @@ -1587,7 +1602,11 @@ def __setitem__(self, index: IndexingExpr, value: ArrayLike | Accumulator) -> No self.view()[self._compute_vectorized_index(indexes)] = np.asarray(value) return - in_array = np.asarray(value) + # A Histogram value must keep its flow bins; np.asarray() would call + # __array__, which drops them (returns view(flow=False)). + in_array = ( + value.view(flow=True) if isinstance(value, Histogram) else np.asarray(value) + ) view: Any = self.view(flow=True) value_shape: tuple[int, ...] diff --git a/tests/test_histogram.py b/tests/test_histogram.py index 9d9690b7..2084cffd 100644 --- a/tests/test_histogram.py +++ b/tests/test_histogram.py @@ -112,11 +112,12 @@ def test_fill_int_1d(): assert h[bh.underflow + 1] == 2 assert h[-1] == 3 + assert h[-3] == h[0] with pytest.raises(IndexError): h[3] with pytest.raises(IndexError): - h[-3] + h[-4] def test_fill_int_with_float_single_1d(): diff --git a/tests/test_histogram_indexing.py b/tests/test_histogram_indexing.py index 712ae71e..8d0003bb 100644 --- a/tests/test_histogram_indexing.py +++ b/tests/test_histogram_indexing.py @@ -693,3 +693,28 @@ def test_vectorized_get_rejects_unsupported(): # A categorical pick list cannot be combined with array indexing with pytest.raises(IndexError, match="integer arrays"): h[[0, 1], arr] + + +def test_negative_size_index(): + # Issue #1143 (B8a): -size is a valid Python index for bin 0 + h = bh.Histogram(bh.axis.Regular(10, 0, 1)) + h[0] = 7 + + assert h[-10] == 7 + assert h[-10] == h[0] + + with pytest.raises(IndexError): + h[-11] + with pytest.raises(IndexError): + h[10] + + +def test_string_index_raises(): + # Issue #1143 (B14a): a plain string is not a valid index (use bh.loc) + h = bh.Histogram(bh.axis.StrCategory(["a", "b"])) + h.fill(["a", "a", "b"]) + + with pytest.raises(IndexError, match="locator protocol"): + h["a"] + + assert h[bh.loc("a")] == 2 diff --git a/tests/test_histogram_init.py b/tests/test_histogram_init.py index ab90247e..eaa47690 100644 --- a/tests/test_histogram_init.py +++ b/tests/test_histogram_init.py @@ -134,3 +134,12 @@ def test_make_selection(ax, storage): histogram = bh.Histogram(ax, ax, storage=storage()) assert isinstance(histogram, bh.Histogram) # TODO: Make this test do something useful + + +def test_init_bad_storage(): + # Issue #1143 (B14b): clear TypeErrors for invalid storage arguments + with pytest.raises(TypeError, match="Only storages allowed"): + bh.Histogram(bh.axis.Regular(10, 0, 1), storage=42) + + with pytest.raises(TypeError, match=r"use Double\(\)"): + bh.Histogram(bh.axis.Regular(10, 0, 1), storage=bh.storage.Double) diff --git a/tests/test_histogram_set.py b/tests/test_histogram_set.py index 56702d31..f798ca7d 100644 --- a/tests/test_histogram_set.py +++ b/tests/test_histogram_set.py @@ -202,3 +202,42 @@ def test_vectorized_set_multicell(): i1 = np.array([0, 3]) h[i0, i1] = 7.0 np.testing.assert_array_equal(h.view()[:, i0, i1], 7.0) + + +def test_set_histogram_with_flow(): + # Issue #1143 (B1): np.asarray(value) dropped a Histogram's flow bins + h = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h2 = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h2.view(flow=True)[:] = [100, 1, 2, 3, 200] + + h[:] = h2 + assert_array_equal(h.view(flow=True), [100, 1, 2, 3, 200]) + + +def test_set_histogram_with_flow_2d(): + h = bh.Histogram(bh.axis.Regular(2, 0, 1), bh.axis.Regular(2, 0, 1)) + h2 = bh.Histogram(bh.axis.Regular(2, 0, 1), bh.axis.Regular(2, 0, 1)) + h2.view(flow=True)[...] = np.arange(16).reshape(4, 4) + + h[:, :] = h2 + assert_array_equal(h.view(flow=True), np.arange(16).reshape(4, 4)) + + +def test_set_histogram_without_flow(): + # A flow-less histogram value still sets the inner bins only + h = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h3 = bh.Histogram(bh.axis.Regular(3, 0, 1, underflow=False, overflow=False)) + h3[:] = [1, 2, 3] + + h[0:3] = h3 + assert_array_equal(h.view(flow=True), [0, 1, 2, 3, 0]) + + +def test_set_histogram_flow_mismatch(): + # Cannot set flow bins of a slice that does not include them + h = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h2 = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h2.view(flow=True)[:] = [100, 1, 2, 3, 200] + + with pytest.raises(ValueError): + h[0:3] = h2 diff --git a/tests/test_threaded_fill.py b/tests/test_threaded_fill.py index d60841f2..52934607 100644 --- a/tests/test_threaded_fill.py +++ b/tests/test_threaded_fill.py @@ -92,6 +92,42 @@ def test_threaded_weight_storage(threads): assert_almost_equal(hist_1.view().variance, hist_2.view().variance) +@pytest.mark.parametrize("threads", [2, 4, 7], ids=lambda x: f"threads={x}") +def test_threaded_scalar_broadcast(threads): + # Issue #1143 (B5): scalar positional args used to crash np.array_split + hist_1 = bh.Histogram(bh.axis.Regular(10, 0, 1), bh.axis.Regular(10, 0, 1)) + hist_2 = hist_1.copy() + + y = np.random.rand(13) + hist_1.fill(0.5, y) + hist_2.fill(0.5, y, threads=threads) + + assert_array_equal(hist_1.view(flow=True), hist_2.view(flow=True)) + + +@pytest.mark.parametrize("threads", [2, 4, 7], ids=lambda x: f"threads={x}") +def test_threaded_scalar_weight(threads): + # Issue #1143 (B5): 0-d array weights are not np.isscalar but must broadcast + x = np.random.rand(13) + + hist_1 = bh.Histogram(bh.axis.Regular(10, 0, 1), storage=bh.storage.Weight()) + hist_2 = hist_1.copy() + + hist_1.fill(x, weight=np.array(2.0)) + hist_2.fill(x, weight=np.array(2.0), threads=threads) + + assert_almost_equal(hist_1.view().value, hist_2.view().value) + assert_almost_equal(hist_1.view().variance, hist_2.view().variance) + + +@pytest.mark.parametrize("threads", [2, 4]) +def test_threaded_all_scalar(threads): + # All-scalar fills must fill exactly once, not once per thread + hist = bh.Histogram(bh.axis.Regular(10, 0, 1)) + hist.fill(0.5, threads=threads) + assert hist.sum() == 1 + + def test_no_profile(): hist = bh.Histogram(bh.axis.Regular(10, 0, 1), storage=bh.storage.Mean()) hist.fill([1, 1], sample=[1, 1])