From 69fba2c4d160aaca93adbf88c71fe5aa2e756c68 Mon Sep 17 00:00:00 2001 From: Dmitry Nikolaev <139769634+dnikolaev-amd@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:59:50 +0200 Subject: [PATCH] [ROCm] skip two gaussian_blur tests on gfx90a Skip two gaussian_blur CUDA tests on AMD gfx90a (MI200, MI250) that fail due to small numerical differences with reference values. Other GPUs and CPU paths are unchanged. 1. `test_transforms_tensor.py::test_gaussian_blur[3-meth_kwargs4-cuda]` Failure: Batched GaussianBlur vs per-image calls disagree by 1 on a single uint8 pixel after rounding from fp32. Cause: MIOpen conv2d returns batch and single results that differ by 1 float32 ULP at a half-integer (batched: 188.50000000, single: 188.50001526), so rounding gives 188 vs 189. Not a transform logic bug. 2. `test_functional_tensor.py::test_gaussian_blur[gaussian_blur-sigma3-ksize2-dt3-large-cuda]` Failure: Output exceeds atol=1.0 vs stored OpenCV reference (max diff 1.125 at known pixels). Cause: Looks like incorrect fp16 OpenCV reference value. CPU (174.0) and gfx90a (173.875) both differ from OpenCV (175.0) but agree with each other within ~0.125 (1 fp16 ULP) Add gfx90a + ROCm + PYTEST_CURRENT_TEST guards to skip failed tests --- test/test_functional_tensor.py | 12 ++++++++++++ test/test_transforms_tensor.py | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index 7d491372b77..aa124e7f20f 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -1009,6 +1009,18 @@ def test_crop(device, top, left, height, width): @pytest.mark.parametrize("fn", [F.gaussian_blur, torch.jit.script(F.gaussian_blur)]) def test_gaussian_blur(device, image_size, dt, ksize, sigma, fn): + if all( + [ + device == "cuda", + torch.version.hip is not None, + torch.cuda.is_available() and "gfx90a" in torch.cuda.get_device_properties().gcnArchName, + "[gaussian_blur-sigma3-ksize2-dt3-large-cuda]" in os.environ.get("PYTEST_CURRENT_TEST", "") + ] + ): + pytest.skip( + "Skipped on gfx90a because fp16 gaussian_blur differs from stored OpenCV reference by more then atol+ULP" + ) + # true_cv2_results = { # # np_img = np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3)) # # cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.8) diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index eac52dafc17..0294ac43a5e 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -847,6 +847,18 @@ def test_gaussian_blur(device, channels, meth_kwargs): ): pytest.skip("Fails on Windows, see https://github.com/pytorch/vision/issues/5464") + if all( + [ + device == "cuda", + torch.version.hip is not None, + torch.cuda.is_available() and "gfx90a" in torch.cuda.get_device_properties().gcnArchName, + "test_gaussian_blur[3-meth_kwargs4-cuda]" in os.environ.get("PYTEST_CURRENT_TEST", "") + ] + ): + pytest.skip( + "Skipped on gfx90a because of uint8 rounding difference for batched and single conv2d" + ) + tol = 1.0 + 1e-10 torch.manual_seed(12) _test_class_op(