Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions loopy/type_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,47 @@ def map_quotient(self, expr: p.Quotient):
else:
return self.combine([n_dtype_set, d_dtype_set])

def _map_int_div_modulo(self, expr: p.FloorDiv | p.Remainder):
n_dtype_set = self.rec(expr.numerator)
d_dtype_set = self.rec(expr.denominator)

if not (n_dtype_set and d_dtype_set):
return cast("list[NumpyType]", [])

n_dtype = cast("NumpyType", n_dtype_set[0]).dtype
d_dtype = cast("NumpyType", d_dtype_set[0]).dtype
num = (
np.empty(0, dtype=n_dtype)
if not is_integer(expr.numerator)
else expr.numerator
Comment on lines +443 to +444
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Numpy no longer does value-dependent types AFAIK (as of 2.0), so IMO this if isn't necessary. Just use 1/1 unconditionally.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we can avoid the conditional.

>>> ary_u64 = np.empty(0, dtype=np.uint64)
>>> (ary_u64 // 2).dtype
dtype('uint64')
>>> (ary_u64 // np.empty(0, np.int32)).dtype  # if we don't use the if.
dtype('float64')

)
denom = (
np.empty(0, dtype=d_dtype)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there a risk here of using a zero denominator in a carried-out calculation?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think of: 3cdca14?

if not is_integer(expr.denominator)
else expr.denominator
)
denom = (
cast("int | np.integer", denom + 1)
if is_integer(denom) and denom == 0
else denom
) # avoid divide by zero.

if is_integer(num) and is_integer(denom):
return self.rec(num // denom)

floor_div_np = num // denom
assert isinstance(floor_div_np, np.ndarray)

return [NumpyType(floor_div_np.dtype)]

@override
def map_floor_div(self, expr: p.FloorDiv):
return self._map_int_div_modulo(expr)

@override
def map_remainder(self, expr: p.Remainder):
return self._map_int_div_modulo(expr)

@override
def map_constant(self, expr: object):
if isinstance(expr, np.generic):
Expand Down
14 changes: 14 additions & 0 deletions test/test_loopy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3733,6 +3733,20 @@ def test_type_cast_parse_stringify_roundtrip():
assert expr == parsed


def test_floor_div_modulo_with_uint_index():
# See <https://github.com/inducer/loopy/issues/999>
knl = lp.make_kernel(
"{[i]: 0<=i<10}",
"a[map[i] // 2, map[i] % 35] = i",
[
lp.GlobalArg("map", dtype=np.uint64, shape=lp.auto),
lp.GlobalArg("a", dtype=np.float64, shape=(10, 4)),
],
)
# check the codegen is successful
lp.generate_code_v2(knl).device_code()


if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
Expand Down
Loading