Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/boost_histogram/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
from __future__ import annotations

import sys

try:
from . import _core # noqa: F401
except ImportError as err:
if "_core" not in str(err):
raise

new_msg = "Did you forget to compile boost-histogram? Use CMake or scikit-build-core to build, see the readme."

if sys.version_info >= (3, 11):
err.add_note(new_msg)
raise

total_msg = f"{err}\n{new_msg}"
new_exception = type(err)(total_msg, name=err.name, path=err.path)
raise new_exception from err

from . import accumulators, axis, numpy, storage
from .histogram import Histogram, IndexingExpr, Kind
from .tag import ( # pylint: disable=redefined-builtin
Expand Down
50 changes: 37 additions & 13 deletions src/boost_histogram/axis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,35 @@ def __dir__() -> list[str]:

def _isstr(value: Any) -> bool:
"""
Check to see if this is a stringlike or a (nested) iterable of stringlikes
Check to see if this is a stringlike or a non-empty (nested) iterable of
stringlikes. Empty iterables are not considered stringlike. Iterables
without a length (one-shot iterators) are not inspected, since checking
them would consume them.
"""

if isinstance(value, (str, bytes)):
return True
if hasattr(value, "__iter__"):
return all(_isstr(v) for v in value)
if isinstance(value, np.ndarray):
if value.dtype.kind in {"S", "U"}:
return True
if value.dtype.kind == "O":
return value.size > 0 and all(_isstr(v) for v in value.flat)
return False
if hasattr(value, "__iter__") and hasattr(value, "__len__"):
return len(value) > 0 and all(_isstr(v) for v in value)
return False


def _is_empty_sized(value: Any) -> bool:
"""
Check for a sized, empty iterable (without consuming one-shot iterators).
"""
try:
return len(value) == 0
except TypeError:
return False


def _opts(**kwargs: bool) -> set[str]:
return {k for k, v in kwargs.items() if v}

Expand Down Expand Up @@ -274,11 +293,13 @@ def __getitem__(self, i: AxCallOrInt) -> int | str | tuple[float, float]:
if callable(i):
i = i(self)
else:
size: int = self._ax.size
requested = i
if i < 0:
i += self._ax.size
if i >= self._ax.size:
i += size
if i < 0 or i >= size:
raise IndexError(
f"Out of range access, {i} is more than {self._ax.size}"
f"Out of range access, {requested} is out of range for axis with size {size}"
)
assert not callable(i)
return self.bin(i)
Expand Down Expand Up @@ -371,11 +392,14 @@ def __init__(
if options != {"underflow", "overflow"}:
raise KeyError("Transform supplied, cannot change other options")

if (
not isinstance(transform, AxisTransform)
and AxisTransform in transform.__bases__ # type: ignore[unreachable]
):
raise TypeError(f"You must pass an instance, use {transform}()")
if not isinstance(transform, AxisTransform):
if isinstance(transform, type) and issubclass( # type: ignore[unreachable]
transform, AxisTransform
):
msg = f"You must pass an instance, use {transform.__name__}()"
raise TypeError(msg)
msg = f"transform must be an AxisTransform instance, got {transform!r}"
raise TypeError(msg)

ax = transform._produce(bins, start, stop)

Expand Down Expand Up @@ -695,7 +719,7 @@ def index(self, value: float | str) -> int:
Return the fractional index(es) given a value (or values) on the axis.
"""

if _isstr(value):
if _isstr(value) or _is_empty_sized(value):
return self._ax.index(value) # type: ignore[no-any-return]

msg = f"index({value}) must be a string or iterable of strings for a StrCategory axis"
Expand Down Expand Up @@ -823,7 +847,7 @@ def __getattr__(self, name: str) -> Any:
return self.__class__(getattr(a, name) for a in self)

def __dir__(self) -> list[str]:
names = dir(self.__class__) + dir("np.typing.NDArray[Any]")
names = set(dir(self.__class__)) | set(dir(np.ndarray)) | self._REDUCTIONS
return sorted(n for n in names if not n.startswith("_"))

def __call__(self, *args: Any, **kwargs: Any) -> Any:
Expand Down
68 changes: 35 additions & 33 deletions src/boost_histogram/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import copy
import enum
import logging
import sys
import threading
import typing
import warnings
Expand Down Expand Up @@ -47,22 +46,6 @@
WeightedSum,
)

try:
from . import _core
except ImportError as err:
if "_core" not in str(err):
raise

new_msg = "Did you forget to compile boost-histogram? Use CMake or scikit-build-core to build, see the readme."

if sys.version_info >= (3, 11):
err.add_note(new_msg)
raise

total_msg = f"{err}\n{new_msg}"
new_exception = type(err)(new_msg, name=err.name, path=err.path)
raise new_exception from err


# This is a StrEnum as defined in Python 3.11
class Kind(str, enum.Enum):
Expand Down Expand Up @@ -376,13 +359,13 @@ def __init__(

# Check for missed parenthesis or incorrect types
if not isinstance(resolved_storage, Storage):
msg_storage = ( # type: ignore[unreachable]
"Passing in an initialized storage has been removed. Please add ()."
)
msg_unknown = "Only storages allowed in storage argument"
raise KeyError(
msg_storage if issubclass(resolved_storage, Storage) else msg_unknown
)
if isinstance(resolved_storage, type) and issubclass( # type: ignore[unreachable]
resolved_storage, Storage
):
msg = f"Storages need to be initialized; use {resolved_storage.__name__}() instead. Please add ()."
raise TypeError(msg)
msg = f"Only storages allowed in storage argument, got {resolved_storage!r}"
raise TypeError(msg)

# Allow a tuple to represent a regular axis
axes = tuple(_arg_shortcut(arg) for arg in axes) # type: ignore[arg-type]
Expand Down Expand Up @@ -910,22 +893,33 @@ def fill(
}:
raise RuntimeError("Mean histograms do not support threaded filling")

data: list[list[np.typing.NDArray[Any]] | list[str]] = []
# If everything is scalar, there is only a single fill; threading would
# incorrectly repeat it, so fill directly instead.
if (
all(isinstance(a, str) or np.ndim(a) == 0 for a in args_ars)
and (weight_ars is None or np.ndim(weight_ars) == 0)
and (sample_ars is None or np.ndim(sample_ars) == 0)
):
self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars)
return self

data: list[list[Any]] = []
for a in args_ars:
if isinstance(a, str):
if isinstance(a, str) or np.ndim(a) == 0:
# Scalars broadcast against each thread's chunk
data.append([a] * threads)
else:
data.append(np.array_split(np.asarray(a), threads))
data.append(list(np.array_split(np.asarray(a), threads)))

weights: list[Any]
if weight is None or np.isscalar(weight):
if weight_ars is None or np.ndim(weight_ars) == 0:
assert threads is not None
weights = [weight_ars] * threads
else:
weights = np.array_split(np.asarray(weight_ars), threads)

samples: list[Any]
if sample_ars is None or np.isscalar(sample_ars):
if sample_ars is None or np.ndim(sample_ars) == 0:
assert threads is not None
samples = [sample_ars] * threads
else:
Expand Down Expand Up @@ -1097,9 +1091,11 @@ def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing:
raise TypeError(f"Index {index} must be an integer, not float")

if isinstance(index, SupportsIndex):
if abs(int(index)) >= self._hist.axis(axis).size:
idx = int(index)
size: int = self._hist.axis(axis).size
if not -size <= idx < size:
raise IndexError("histogram index is out of range")
return int(index) % self._hist.axis(axis).size
return idx % size

return index

Expand Down Expand Up @@ -1362,7 +1358,9 @@ def __getitem__(
pick_each[i] = ind.__index__() + (
1 if self.axes[i].traits.underflow else 0
)
case collections.abc.Sequence():
# str/bytes are Sequences but not valid indices; they fall
# through to the IndexError below.
case collections.abc.Sequence() if not isinstance(ind, (str, bytes)): # type: ignore[unreachable]
pick_set[i] = list(ind)
case slice(start=start, stop=stop, step=step):
reduced, new_slices, new_integrations = self._handle_slice(
Expand Down Expand Up @@ -1604,7 +1602,11 @@ def __setitem__(self, index: IndexingExpr, value: ArrayLike | Accumulator) -> No
self.view()[self._compute_vectorized_index(indexes)] = np.asarray(value)
return

in_array = np.asarray(value)
# A Histogram value must keep its flow bins; np.asarray() would call
# __array__, which drops them (returns view(flow=False)).
in_array = (
value.view(flow=True) if isinstance(value, Histogram) else np.asarray(value)
)
view: Any = self.view(flow=True)

value_shape: tuple[int, ...]
Expand Down
12 changes: 12 additions & 0 deletions tests/test_axes_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,15 @@ def test_axis_misconstuct():

with pytest.raises(TypeError):
bh.axis.AxesTuple(inp[0])


def test_array_tuple_dir(h):
# Issue #1143 (B15): __dir__ used dir() of a string literal
listing = dir(h.axes.centers)

assert "sum" in listing
assert "broadcast" in listing
# ndarray attributes are forwarded, so they should be listed
assert "shape" in listing
assert "T" in listing
assert "upper" not in listing # no str methods
54 changes: 52 additions & 2 deletions tests/test_axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,10 @@ def test_init(self):

with pytest.raises(TypeError):
bh.axis.Regular(1, 1.0, 2.0, bad_keyword="ra")
with pytest.raises(AttributeError):
with pytest.raises(TypeError):
bh.axis.Regular(1, 1.0, 2.0, transform=lambda _: 2)
with pytest.raises(TypeError):
bh.axis.Regular(1, 1.0, 2.0, transform=bh.axis.transform.Pow)
# TODO: These errors could be better

def test_traits(self):
STD_TRAITS = {"continuous": True, "ordered": True}
Expand Down Expand Up @@ -910,3 +909,54 @@ def test_edges_centers_widths(self):
assert_allclose(a.edges, [0.0, 1.0, 2.0])
assert_allclose(a.centers, [0.5, 1.5])
assert_allclose(a.widths, [1, 1])


# Issue #1143 regression tests


def test_getitem_negative_wraparound():
# Issue #1143 (B8b): ax[-size-1] used to wrap into the underflow bin
ax = bh.axis.Regular(10, 0, 1)

assert ax[-10] == ax[0]
assert ax[-1] == ax[9]

with pytest.raises(IndexError, match="-11"):
ax[-11]
with pytest.raises(IndexError, match="10"):
ax[10]


def test_index_empty_sequence():
# Issue #1143 (B9): empty sequences are not strings
assert bh.axis.Regular(10, 0, 1).index([]).size == 0
assert bh.axis.Regular(10, 0, 1).index(np.array([])).size == 0
assert bh.axis.Integer(0, 5).index(()).size == 0

# Still rejects actual strings
with pytest.raises(TypeError):
bh.axis.Regular(10, 0, 1).index("hi")
with pytest.raises(TypeError):
bh.axis.Regular(10, 0, 1).index(["hi", "ho"])

# StrCategory still accepts empty and string input
ax = bh.axis.StrCategory(["a", "b"])
assert ax.index([]).size == 0
assert ax.index(np.array([])).size == 0
assert ax.index("a") == 0
assert_array_equal(ax.index(["b", "a"]), [1, 0])

with pytest.raises(TypeError):
ax.index([1, 2])


def test_transform_validation():
# Issue #1143 (B14c): clear TypeErrors for invalid transform arguments
with pytest.raises(TypeError, match=r"use Pow\(\)"):
bh.axis.Regular(10, 1, 100, transform=bh.axis.transform.Pow)

with pytest.raises(TypeError, match="AxisTransform"):
bh.axis.Regular(10, 1, 100, transform="log")

with pytest.raises(TypeError, match="AxisTransform"):
bh.axis.Regular(10, 1, 100, transform=42)
29 changes: 28 additions & 1 deletion tests/test_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import operator
import pickle
import platform
import subprocess
import sys
import threading
from collections import OrderedDict
Expand Down Expand Up @@ -111,11 +112,12 @@ def test_fill_int_1d():
assert h[bh.underflow + 1] == 2

assert h[-1] == 3
assert h[-3] == h[0]

with pytest.raises(IndexError):
h[3]
with pytest.raises(IndexError):
h[-3]
h[-4]


def test_fill_int_with_float_single_1d():
Expand Down Expand Up @@ -1719,3 +1721,28 @@ def test_allclose_different_continuous_traits():
h2 = bh.Histogram(bh.axis.Integer(0, 3))

assert not h1.allclose(h2)


@pytest.mark.skipif(sys.platform.startswith("emscripten"), reason="needs subprocess")
def test_missing_core_hint():
# Issue #1143 (B10): if the compiled _core module is missing, the import
# error must include the "did you forget to compile" hint. This only works
# if boost_histogram/__init__.py imports _core before any submodule does.
code = """
import sys
import importlib.abc

class Blocker(importlib.abc.MetaPathFinder):
def find_spec(self, name, path=None, target=None):
if name == "boost_histogram._core":
raise ModuleNotFoundError(f"No module named {name!r}", name=name)
return None

sys.meta_path.insert(0, Blocker())
import boost_histogram
"""
result = subprocess.run(
[sys.executable, "-c", code], capture_output=True, text=True, check=False
)
assert result.returncode != 0
assert "Did you forget to compile boost-histogram?" in result.stderr
Loading