diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 7bbee772e..f32dd14e3 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -464,8 +464,59 @@ def map_if(self, expr: p.If, type_context: TypeContext): self.rec(expr.else_, type_context, result_type), ) + def _is_mixed_sign_integer_pair( + self, + dtype1: LoopyType, + dtype2: LoopyType, + ) -> bool: + """Return True if *dtype1* and *dtype2* are integers of opposite + signedness (one signed, one unsigned).""" + return ( + isinstance(dtype1, NumpyType) + and isinstance(dtype2, NumpyType) + and dtype1.is_integral() + and dtype2.is_integral() + and dtype1.numpy_dtype.kind != dtype2.numpy_dtype.kind + ) + + def _common_signed_dtype( + self, + dtype1: np.dtype, + dtype2: np.dtype, + ) -> NumpyType: + """Return the narrowest signed integer dtype that can hold all values + of both *dtype1* and *dtype2* (which must be integer dtypes). + + Used to pick a common cast target when comparing or combining + mixed-sign integers, so that C's implicit unsigned-promotion rule + cannot give wrong results. + """ + common = np.result_type(dtype1, dtype2) + if common.kind == "i": + return NumpyType(common) + # int64 vs uint64: numpy says float64; fall back to int64 (the widest + # standard signed type) and accept that uint64 values above INT64_MAX + # will not be represented correctly. + return NumpyType(np.dtype(np.int64)) + @override def map_comparison(self, expr: p.Comparison, type_context: TypeContext): + left_dtype = self.infer_type(cast("ArithmeticExpression", expr.left)) + right_dtype = self.infer_type(cast("ArithmeticExpression", expr.right)) + + if self._is_mixed_sign_integer_pair(left_dtype, right_dtype): + # C would silently convert the signed operand to unsigned (giving + # wrong results for negative values). Cast both to a common + # signed type to get Python-like semantics. + assert isinstance(left_dtype, NumpyType) + assert isinstance(right_dtype, NumpyType) + common_type = self._common_signed_dtype( + left_dtype.numpy_dtype, right_dtype.numpy_dtype) + return type(expr)( + self.rec(expr.left, "i", common_type), + expr.operator, + self.rec(expr.right, "i", common_type)) + inner_type_context = dtype_to_type_context( self.kernel.target, self.infer_type( @@ -477,6 +528,63 @@ def map_comparison(self, expr: p.Comparison, type_context: TypeContext): expr.operator, self.rec(expr.right, inner_type_context)) + @override + def map_sum(self, expr: p.Sum, type_context: TypeContext): + result_dtype = self.infer_type(expr) + + if (isinstance(result_dtype, NumpyType) + and result_dtype.numpy_dtype.kind == "i" + and result_dtype.is_integral()): + # If any child is an unsigned integer, cast it to the signed + # result type before addition. Without this, C's usual arithmetic + # conversions would let the unsigned operand "win" and silently + # turn the computation into unsigned arithmetic. + new_children = [] + modified = False + for child in expr.children: + child_dtype = self.infer_type( + cast("ArithmeticExpression", child)) + if (isinstance(child_dtype, NumpyType) + and child_dtype.numpy_dtype.kind == "u"): + new_children.append( + self.rec(child, type_context, result_dtype)) + modified = True + else: + new_children.append(self.rec(child, type_context)) + if modified: + return type(expr)(tuple(new_children)) + + return type(expr)(tuple( + self.rec(child, type_context) for child in expr.children)) + + @override + def map_product(self, expr: p.Product, type_context: TypeContext): + result_dtype = self.infer_type(expr) + + if (isinstance(result_dtype, NumpyType) + and result_dtype.numpy_dtype.kind == "i" + and result_dtype.is_integral()): + # Same rationale as map_sum: cast unsigned factors to the signed + # result type so the C multiplication is performed in signed + # arithmetic. + new_children = [] + modified = False + for child in expr.children: + child_dtype = self.infer_type( + cast("ArithmeticExpression", child)) + if (isinstance(child_dtype, NumpyType) + and child_dtype.numpy_dtype.kind == "u"): + new_children.append( + self.rec(child, type_context, result_dtype)) + modified = True + else: + new_children.append(self.rec(child, type_context)) + if modified: + return type(expr)(tuple(new_children)) + + return type(expr)(tuple( + self.rec(child, type_context) for child in expr.children)) + @override def map_type_cast(self, expr: TypeCast, diff --git a/test/test_c_execution.py b/test/test_c_execution.py index 255a78c71..07f554530 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -366,6 +366,133 @@ def test_scalar_global_args(): assert out == (n*(n-1)/2) # noqa: RUF069 +@pytest.mark.parametrize("signed_dtype,unsigned_dtype", [ + (np.int8, np.uint8), + (np.int16, np.uint16), + (np.int32, np.uint32), + (np.int64, np.uint64), +]) +def test_mixed_sign_comparison(signed_dtype, unsigned_dtype): + """Mixed-sign comparisons must follow Python semantics (not C's + implicit unsigned-promotion rule). The classic C footgun is: + + int x = -1; unsigned int y = 1; + if (x < y) { ... } // NOT taken — x becomes UINT_MAX + + Loopy should generate explicit casts so the branch IS taken. + """ + t_unit = lp.make_kernel( + "{:}", + """ + lt_result = if(sv < uv, 1, 0) + le_result = if(sv <= uv, 1, 0) + gt_result = if(sv > uv, 1, 0) + ge_result = if(sv >= uv, 1, 0) + eq_result = if(sv == uv, 1, 0) + ne_result = if(sv != uv, 1, 0) + """, + [ + lp.ValueArg("sv", signed_dtype), + lp.ValueArg("uv", unsigned_dtype), + lp.GlobalArg("lt_result, le_result, gt_result, " + "ge_result, eq_result, ne_result", + np.int32, shape=lp.auto), + ], + target=lp.ExecutableCTarget(), + ) + t_unit = lp.set_options(t_unit, return_dict=True) + + # --- sv = -1, uv = 1 ------------------------------------------------------- + # Python: -1 < 1 → True, -1 <= 1 → True, -1 > 1 → False, + # -1 >= 1 → False, -1 == 1 → False, -1 != 1 → True + sv = signed_dtype(-1) + uv = unsigned_dtype(1) + _evt, out = t_unit(sv=sv, uv=uv) + assert out["lt_result"][()] == 1, f"{sv} < {uv} should be True" + assert out["le_result"][()] == 1, f"{sv} <= {uv} should be True" + assert out["gt_result"][()] == 0, f"{sv} > {uv} should be False" + assert out["ge_result"][()] == 0, f"{sv} >= {uv} should be False" + assert out["eq_result"][()] == 0, f"{sv} == {uv} should be False" + assert out["ne_result"][()] == 1, f"{sv} != {uv} should be True" + + # --- sv = 1, uv = 1 -------------------------------------------------------- + sv = signed_dtype(1) + uv = unsigned_dtype(1) + _evt, out = t_unit(sv=sv, uv=uv) + assert out["lt_result"][()] == 0, f"{sv} < {uv} should be False" + assert out["le_result"][()] == 1, f"{sv} <= {uv} should be True" + assert out["gt_result"][()] == 0, f"{sv} > {uv} should be False" + assert out["ge_result"][()] == 1, f"{sv} >= {uv} should be True" + assert out["eq_result"][()] == 1, f"{sv} == {uv} should be True" + assert out["ne_result"][()] == 0, f"{sv} != {uv} should be False" + + # --- sv = 2, uv = 1 -------------------------------------------------------- + sv = signed_dtype(2) + uv = unsigned_dtype(1) + _evt, out = t_unit(sv=sv, uv=uv) + assert out["lt_result"][()] == 0, f"{sv} < {uv} should be False" + assert out["le_result"][()] == 0, f"{sv} <= {uv} should be False" + assert out["gt_result"][()] == 1, f"{sv} > {uv} should be True" + assert out["ge_result"][()] == 1, f"{sv} >= {uv} should be True" + assert out["eq_result"][()] == 0, f"{sv} == {uv} should be False" + assert out["ne_result"][()] == 1, f"{sv} != {uv} should be True" + + +@pytest.mark.parametrize("signed_dtype,unsigned_dtype", [ + (np.int8, np.uint8), + (np.int16, np.uint16), + (np.int32, np.uint32), + (np.int64, np.uint64), +]) +def test_mixed_sign_subtraction(signed_dtype, unsigned_dtype): + """Mixed-sign subtraction must follow Python semantics. + + In C, ``(int32_t)-1 - (uint32_t)1`` is computed as + ``(uint32_t)UINT_MAX - 1 = 4294967294``, which when cast to int64 + gives 4294967294, not -2. Loopy must insert explicit casts so the + result is -2. + """ + # result dtype: the narrowest signed type wide enough to hold both + result_dtype = np.result_type(signed_dtype, unsigned_dtype) + if result_dtype.kind != "i": + # int64 vs uint64: numpy promotes to float64; use int64 instead + result_dtype = np.dtype(np.int64) + + t_unit = lp.make_kernel( + "{:}", + """ + diff_sv_uv = sv - uv + diff_uv_sv = uv - sv + """, + [ + lp.ValueArg("sv", signed_dtype), + lp.ValueArg("uv", unsigned_dtype), + lp.GlobalArg("diff_sv_uv", result_dtype, shape=lp.auto), + lp.GlobalArg("diff_uv_sv", result_dtype, shape=lp.auto), + ], + target=lp.ExecutableCTarget(), + ) + t_unit = lp.set_options(t_unit, return_dict=True) + + # --- sv = -1, uv = 1 → expected -2 / 2 ------------------------------------ + sv = signed_dtype(-1) + uv = unsigned_dtype(1) + _evt, out = t_unit(sv=sv, uv=uv) + assert out["diff_sv_uv"][()] == -2, \ + f"{sv} - {uv} should be -2, got {out['diff_sv_uv'][()]}" + assert out["diff_uv_sv"][()] == 2, \ + f"{uv} - {sv} should be 2, got {out['diff_uv_sv'][()]}" + + # --- sv = 5, uv = 3 → expected 2 / -2 ------------------------------------- + sv = signed_dtype(5) + uv = unsigned_dtype(3) + _evt, out = t_unit(sv=sv, uv=uv) + assert out["diff_sv_uv"][()] == 2, \ + f"{sv} - {uv} should be 2, got {out['diff_sv_uv'][()]}" + assert out["diff_uv_sv"][()] == -2, \ + f"{uv} - {sv} should be -2, got {out['diff_uv_sv'][()]}" + + if __name__ == "__main__": import sys if len(sys.argv) > 1: