diff --git a/src/boost_histogram/__init__.py b/src/boost_histogram/__init__.py index 8747d591..c5d8103e 100644 --- a/src/boost_histogram/__init__.py +++ b/src/boost_histogram/__init__.py @@ -1,5 +1,23 @@ from __future__ import annotations +import sys + +try: + from . import _core # noqa: F401 +except ImportError as err: + if "_core" not in str(err): + raise + + new_msg = "Did you forget to compile boost-histogram? Use CMake or scikit-build-core to build, see the readme." + + if sys.version_info >= (3, 11): + err.add_note(new_msg) + raise + + total_msg = f"{err}\n{new_msg}" + new_exception = type(err)(total_msg, name=err.name, path=err.path) + raise new_exception from err + from . import accumulators, axis, numpy, storage from .histogram import Histogram, IndexingExpr, Kind from .tag import ( # pylint: disable=redefined-builtin diff --git a/src/boost_histogram/axis/__init__.py b/src/boost_histogram/axis/__init__.py index f7435413..19cf5f57 100644 --- a/src/boost_histogram/axis/__init__.py +++ b/src/boost_histogram/axis/__init__.py @@ -43,16 +43,35 @@ def __dir__() -> list[str]: def _isstr(value: Any) -> bool: """ - Check to see if this is a stringlike or a (nested) iterable of stringlikes + Check to see if this is a stringlike or a non-empty (nested) iterable of + stringlikes. Empty iterables are not considered stringlike. Iterables + without a length (one-shot iterators) are not inspected, since checking + them would consume them. """ if isinstance(value, (str, bytes)): return True - if hasattr(value, "__iter__"): - return all(_isstr(v) for v in value) + if isinstance(value, np.ndarray): + if value.dtype.kind in {"S", "U"}: + return True + if value.dtype.kind == "O": + return value.size > 0 and all(_isstr(v) for v in value.flat) + return False + if hasattr(value, "__iter__") and hasattr(value, "__len__"): + return len(value) > 0 and all(_isstr(v) for v in value) return False +def _is_empty_sized(value: Any) -> bool: + """ + Check for a sized, empty iterable (without consuming one-shot iterators). + """ + try: + return len(value) == 0 + except TypeError: + return False + + def _opts(**kwargs: bool) -> set[str]: return {k for k, v in kwargs.items() if v} @@ -274,11 +293,13 @@ def __getitem__(self, i: AxCallOrInt) -> int | str | tuple[float, float]: if callable(i): i = i(self) else: + size: int = self._ax.size + requested = i if i < 0: - i += self._ax.size - if i >= self._ax.size: + i += size + if i < 0 or i >= size: raise IndexError( - f"Out of range access, {i} is more than {self._ax.size}" + f"Out of range access, {requested} is out of range for axis with size {size}" ) assert not callable(i) return self.bin(i) @@ -371,11 +392,14 @@ def __init__( if options != {"underflow", "overflow"}: raise KeyError("Transform supplied, cannot change other options") - if ( - not isinstance(transform, AxisTransform) - and AxisTransform in transform.__bases__ # type: ignore[unreachable] - ): - raise TypeError(f"You must pass an instance, use {transform}()") + if not isinstance(transform, AxisTransform): + if isinstance(transform, type) and issubclass( # type: ignore[unreachable] + transform, AxisTransform + ): + msg = f"You must pass an instance, use {transform.__name__}()" + raise TypeError(msg) + msg = f"transform must be an AxisTransform instance, got {transform!r}" + raise TypeError(msg) ax = transform._produce(bins, start, stop) @@ -695,7 +719,7 @@ def index(self, value: float | str) -> int: Return the fractional index(es) given a value (or values) on the axis. """ - if _isstr(value): + if _isstr(value) or _is_empty_sized(value): return self._ax.index(value) # type: ignore[no-any-return] msg = f"index({value}) must be a string or iterable of strings for a StrCategory axis" @@ -823,7 +847,7 @@ def __getattr__(self, name: str) -> Any: return self.__class__(getattr(a, name) for a in self) def __dir__(self) -> list[str]: - names = dir(self.__class__) + dir("np.typing.NDArray[Any]") + names = set(dir(self.__class__)) | set(dir(np.ndarray)) | self._REDUCTIONS return sorted(n for n in names if not n.startswith("_")) def __call__(self, *args: Any, **kwargs: Any) -> Any: diff --git a/src/boost_histogram/histogram.py b/src/boost_histogram/histogram.py index 1067e51e..528b222b 100644 --- a/src/boost_histogram/histogram.py +++ b/src/boost_histogram/histogram.py @@ -4,7 +4,6 @@ import copy import enum import logging -import sys import threading import typing import warnings @@ -47,22 +46,6 @@ WeightedSum, ) -try: - from . import _core -except ImportError as err: - if "_core" not in str(err): - raise - - new_msg = "Did you forget to compile boost-histogram? Use CMake or scikit-build-core to build, see the readme." - - if sys.version_info >= (3, 11): - err.add_note(new_msg) - raise - - total_msg = f"{err}\n{new_msg}" - new_exception = type(err)(new_msg, name=err.name, path=err.path) - raise new_exception from err - # This is a StrEnum as defined in Python 3.11 class Kind(str, enum.Enum): @@ -376,13 +359,13 @@ def __init__( # Check for missed parenthesis or incorrect types if not isinstance(resolved_storage, Storage): - msg_storage = ( # type: ignore[unreachable] - "Passing in an initialized storage has been removed. Please add ()." - ) - msg_unknown = "Only storages allowed in storage argument" - raise KeyError( - msg_storage if issubclass(resolved_storage, Storage) else msg_unknown - ) + if isinstance(resolved_storage, type) and issubclass( # type: ignore[unreachable] + resolved_storage, Storage + ): + msg = f"Storages need to be initialized; use {resolved_storage.__name__}() instead. Please add ()." + raise TypeError(msg) + msg = f"Only storages allowed in storage argument, got {resolved_storage!r}" + raise TypeError(msg) # Allow a tuple to represent a regular axis axes = tuple(_arg_shortcut(arg) for arg in axes) # type: ignore[arg-type] @@ -910,22 +893,33 @@ def fill( }: raise RuntimeError("Mean histograms do not support threaded filling") - data: list[list[np.typing.NDArray[Any]] | list[str]] = [] + # If everything is scalar, there is only a single fill; threading would + # incorrectly repeat it, so fill directly instead. + if ( + all(isinstance(a, str) or np.ndim(a) == 0 for a in args_ars) + and (weight_ars is None or np.ndim(weight_ars) == 0) + and (sample_ars is None or np.ndim(sample_ars) == 0) + ): + self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars) + return self + + data: list[list[Any]] = [] for a in args_ars: - if isinstance(a, str): + if isinstance(a, str) or np.ndim(a) == 0: + # Scalars broadcast against each thread's chunk data.append([a] * threads) else: - data.append(np.array_split(np.asarray(a), threads)) + data.append(list(np.array_split(np.asarray(a), threads))) weights: list[Any] - if weight is None or np.isscalar(weight): + if weight_ars is None or np.ndim(weight_ars) == 0: assert threads is not None weights = [weight_ars] * threads else: weights = np.array_split(np.asarray(weight_ars), threads) samples: list[Any] - if sample_ars is None or np.isscalar(sample_ars): + if sample_ars is None or np.ndim(sample_ars) == 0: assert threads is not None samples = [sample_ars] * threads else: @@ -1097,9 +1091,11 @@ def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing: raise TypeError(f"Index {index} must be an integer, not float") if isinstance(index, SupportsIndex): - if abs(int(index)) >= self._hist.axis(axis).size: + idx = int(index) + size: int = self._hist.axis(axis).size + if not -size <= idx < size: raise IndexError("histogram index is out of range") - return int(index) % self._hist.axis(axis).size + return idx % size return index @@ -1362,7 +1358,9 @@ def __getitem__( pick_each[i] = ind.__index__() + ( 1 if self.axes[i].traits.underflow else 0 ) - case collections.abc.Sequence(): + # str/bytes are Sequences but not valid indices; they fall + # through to the IndexError below. + case collections.abc.Sequence() if not isinstance(ind, (str, bytes)): # type: ignore[unreachable] pick_set[i] = list(ind) case slice(start=start, stop=stop, step=step): reduced, new_slices, new_integrations = self._handle_slice( @@ -1604,7 +1602,11 @@ def __setitem__(self, index: IndexingExpr, value: ArrayLike | Accumulator) -> No self.view()[self._compute_vectorized_index(indexes)] = np.asarray(value) return - in_array = np.asarray(value) + # A Histogram value must keep its flow bins; np.asarray() would call + # __array__, which drops them (returns view(flow=False)). + in_array = ( + value.view(flow=True) if isinstance(value, Histogram) else np.asarray(value) + ) view: Any = self.view(flow=True) value_shape: tuple[int, ...] diff --git a/tests/test_axes_object.py b/tests/test_axes_object.py index 9d02160b..3f8388a3 100644 --- a/tests/test_axes_object.py +++ b/tests/test_axes_object.py @@ -82,3 +82,15 @@ def test_axis_misconstuct(): with pytest.raises(TypeError): bh.axis.AxesTuple(inp[0]) + + +def test_array_tuple_dir(h): + # Issue #1143 (B15): __dir__ used dir() of a string literal + listing = dir(h.axes.centers) + + assert "sum" in listing + assert "broadcast" in listing + # ndarray attributes are forwarded, so they should be listed + assert "shape" in listing + assert "T" in listing + assert "upper" not in listing # no str methods diff --git a/tests/test_axis.py b/tests/test_axis.py index 7827ce06..582e177d 100644 --- a/tests/test_axis.py +++ b/tests/test_axis.py @@ -167,11 +167,10 @@ def test_init(self): with pytest.raises(TypeError): bh.axis.Regular(1, 1.0, 2.0, bad_keyword="ra") - with pytest.raises(AttributeError): + with pytest.raises(TypeError): bh.axis.Regular(1, 1.0, 2.0, transform=lambda _: 2) with pytest.raises(TypeError): bh.axis.Regular(1, 1.0, 2.0, transform=bh.axis.transform.Pow) - # TODO: These errors could be better def test_traits(self): STD_TRAITS = {"continuous": True, "ordered": True} @@ -910,3 +909,54 @@ def test_edges_centers_widths(self): assert_allclose(a.edges, [0.0, 1.0, 2.0]) assert_allclose(a.centers, [0.5, 1.5]) assert_allclose(a.widths, [1, 1]) + + +# Issue #1143 regression tests + + +def test_getitem_negative_wraparound(): + # Issue #1143 (B8b): ax[-size-1] used to wrap into the underflow bin + ax = bh.axis.Regular(10, 0, 1) + + assert ax[-10] == ax[0] + assert ax[-1] == ax[9] + + with pytest.raises(IndexError, match="-11"): + ax[-11] + with pytest.raises(IndexError, match="10"): + ax[10] + + +def test_index_empty_sequence(): + # Issue #1143 (B9): empty sequences are not strings + assert bh.axis.Regular(10, 0, 1).index([]).size == 0 + assert bh.axis.Regular(10, 0, 1).index(np.array([])).size == 0 + assert bh.axis.Integer(0, 5).index(()).size == 0 + + # Still rejects actual strings + with pytest.raises(TypeError): + bh.axis.Regular(10, 0, 1).index("hi") + with pytest.raises(TypeError): + bh.axis.Regular(10, 0, 1).index(["hi", "ho"]) + + # StrCategory still accepts empty and string input + ax = bh.axis.StrCategory(["a", "b"]) + assert ax.index([]).size == 0 + assert ax.index(np.array([])).size == 0 + assert ax.index("a") == 0 + assert_array_equal(ax.index(["b", "a"]), [1, 0]) + + with pytest.raises(TypeError): + ax.index([1, 2]) + + +def test_transform_validation(): + # Issue #1143 (B14c): clear TypeErrors for invalid transform arguments + with pytest.raises(TypeError, match=r"use Pow\(\)"): + bh.axis.Regular(10, 1, 100, transform=bh.axis.transform.Pow) + + with pytest.raises(TypeError, match="AxisTransform"): + bh.axis.Regular(10, 1, 100, transform="log") + + with pytest.raises(TypeError, match="AxisTransform"): + bh.axis.Regular(10, 1, 100, transform=42) diff --git a/tests/test_histogram.py b/tests/test_histogram.py index ff8bea9e..2084cffd 100644 --- a/tests/test_histogram.py +++ b/tests/test_histogram.py @@ -5,6 +5,7 @@ import operator import pickle import platform +import subprocess import sys import threading from collections import OrderedDict @@ -111,11 +112,12 @@ def test_fill_int_1d(): assert h[bh.underflow + 1] == 2 assert h[-1] == 3 + assert h[-3] == h[0] with pytest.raises(IndexError): h[3] with pytest.raises(IndexError): - h[-3] + h[-4] def test_fill_int_with_float_single_1d(): @@ -1719,3 +1721,28 @@ def test_allclose_different_continuous_traits(): h2 = bh.Histogram(bh.axis.Integer(0, 3)) assert not h1.allclose(h2) + + +@pytest.mark.skipif(sys.platform.startswith("emscripten"), reason="needs subprocess") +def test_missing_core_hint(): + # Issue #1143 (B10): if the compiled _core module is missing, the import + # error must include the "did you forget to compile" hint. This only works + # if boost_histogram/__init__.py imports _core before any submodule does. + code = """ +import sys +import importlib.abc + +class Blocker(importlib.abc.MetaPathFinder): + def find_spec(self, name, path=None, target=None): + if name == "boost_histogram._core": + raise ModuleNotFoundError(f"No module named {name!r}", name=name) + return None + +sys.meta_path.insert(0, Blocker()) +import boost_histogram +""" + result = subprocess.run( + [sys.executable, "-c", code], capture_output=True, text=True, check=False + ) + assert result.returncode != 0 + assert "Did you forget to compile boost-histogram?" in result.stderr diff --git a/tests/test_histogram_indexing.py b/tests/test_histogram_indexing.py index 712ae71e..8d0003bb 100644 --- a/tests/test_histogram_indexing.py +++ b/tests/test_histogram_indexing.py @@ -693,3 +693,28 @@ def test_vectorized_get_rejects_unsupported(): # A categorical pick list cannot be combined with array indexing with pytest.raises(IndexError, match="integer arrays"): h[[0, 1], arr] + + +def test_negative_size_index(): + # Issue #1143 (B8a): -size is a valid Python index for bin 0 + h = bh.Histogram(bh.axis.Regular(10, 0, 1)) + h[0] = 7 + + assert h[-10] == 7 + assert h[-10] == h[0] + + with pytest.raises(IndexError): + h[-11] + with pytest.raises(IndexError): + h[10] + + +def test_string_index_raises(): + # Issue #1143 (B14a): a plain string is not a valid index (use bh.loc) + h = bh.Histogram(bh.axis.StrCategory(["a", "b"])) + h.fill(["a", "a", "b"]) + + with pytest.raises(IndexError, match="locator protocol"): + h["a"] + + assert h[bh.loc("a")] == 2 diff --git a/tests/test_histogram_init.py b/tests/test_histogram_init.py index ab90247e..eaa47690 100644 --- a/tests/test_histogram_init.py +++ b/tests/test_histogram_init.py @@ -134,3 +134,12 @@ def test_make_selection(ax, storage): histogram = bh.Histogram(ax, ax, storage=storage()) assert isinstance(histogram, bh.Histogram) # TODO: Make this test do something useful + + +def test_init_bad_storage(): + # Issue #1143 (B14b): clear TypeErrors for invalid storage arguments + with pytest.raises(TypeError, match="Only storages allowed"): + bh.Histogram(bh.axis.Regular(10, 0, 1), storage=42) + + with pytest.raises(TypeError, match=r"use Double\(\)"): + bh.Histogram(bh.axis.Regular(10, 0, 1), storage=bh.storage.Double) diff --git a/tests/test_histogram_set.py b/tests/test_histogram_set.py index 56702d31..f798ca7d 100644 --- a/tests/test_histogram_set.py +++ b/tests/test_histogram_set.py @@ -202,3 +202,42 @@ def test_vectorized_set_multicell(): i1 = np.array([0, 3]) h[i0, i1] = 7.0 np.testing.assert_array_equal(h.view()[:, i0, i1], 7.0) + + +def test_set_histogram_with_flow(): + # Issue #1143 (B1): np.asarray(value) dropped a Histogram's flow bins + h = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h2 = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h2.view(flow=True)[:] = [100, 1, 2, 3, 200] + + h[:] = h2 + assert_array_equal(h.view(flow=True), [100, 1, 2, 3, 200]) + + +def test_set_histogram_with_flow_2d(): + h = bh.Histogram(bh.axis.Regular(2, 0, 1), bh.axis.Regular(2, 0, 1)) + h2 = bh.Histogram(bh.axis.Regular(2, 0, 1), bh.axis.Regular(2, 0, 1)) + h2.view(flow=True)[...] = np.arange(16).reshape(4, 4) + + h[:, :] = h2 + assert_array_equal(h.view(flow=True), np.arange(16).reshape(4, 4)) + + +def test_set_histogram_without_flow(): + # A flow-less histogram value still sets the inner bins only + h = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h3 = bh.Histogram(bh.axis.Regular(3, 0, 1, underflow=False, overflow=False)) + h3[:] = [1, 2, 3] + + h[0:3] = h3 + assert_array_equal(h.view(flow=True), [0, 1, 2, 3, 0]) + + +def test_set_histogram_flow_mismatch(): + # Cannot set flow bins of a slice that does not include them + h = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h2 = bh.Histogram(bh.axis.Regular(3, 0, 1)) + h2.view(flow=True)[:] = [100, 1, 2, 3, 200] + + with pytest.raises(ValueError): + h[0:3] = h2 diff --git a/tests/test_threaded_fill.py b/tests/test_threaded_fill.py index d60841f2..52934607 100644 --- a/tests/test_threaded_fill.py +++ b/tests/test_threaded_fill.py @@ -92,6 +92,42 @@ def test_threaded_weight_storage(threads): assert_almost_equal(hist_1.view().variance, hist_2.view().variance) +@pytest.mark.parametrize("threads", [2, 4, 7], ids=lambda x: f"threads={x}") +def test_threaded_scalar_broadcast(threads): + # Issue #1143 (B5): scalar positional args used to crash np.array_split + hist_1 = bh.Histogram(bh.axis.Regular(10, 0, 1), bh.axis.Regular(10, 0, 1)) + hist_2 = hist_1.copy() + + y = np.random.rand(13) + hist_1.fill(0.5, y) + hist_2.fill(0.5, y, threads=threads) + + assert_array_equal(hist_1.view(flow=True), hist_2.view(flow=True)) + + +@pytest.mark.parametrize("threads", [2, 4, 7], ids=lambda x: f"threads={x}") +def test_threaded_scalar_weight(threads): + # Issue #1143 (B5): 0-d array weights are not np.isscalar but must broadcast + x = np.random.rand(13) + + hist_1 = bh.Histogram(bh.axis.Regular(10, 0, 1), storage=bh.storage.Weight()) + hist_2 = hist_1.copy() + + hist_1.fill(x, weight=np.array(2.0)) + hist_2.fill(x, weight=np.array(2.0), threads=threads) + + assert_almost_equal(hist_1.view().value, hist_2.view().value) + assert_almost_equal(hist_1.view().variance, hist_2.view().variance) + + +@pytest.mark.parametrize("threads", [2, 4]) +def test_threaded_all_scalar(threads): + # All-scalar fills must fill exactly once, not once per thread + hist = bh.Histogram(bh.axis.Regular(10, 0, 1)) + hist.fill(0.5, threads=threads) + assert hist.sum() == 1 + + def test_no_profile(): hist = bh.Histogram(bh.axis.Regular(10, 0, 1), storage=bh.storage.Mean()) hist.fill([1, 1], sample=[1, 1])