Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
f4fc92e
add rocjpeg support
xytpai Jan 8, 2026
a371c3e
update rocjpeg utils
xytpai Jan 8, 2026
e4c4fd0
rm cout
xytpai Jan 8, 2026
3d9041c
refine code
xytpai Jan 16, 2026
1d29986
rm unused file
xytpai Jan 16, 2026
15d8f11
refine code 2
xytpai Jan 16, 2026
fb2f9fc
Merge branch 'main' into xyt/rocjpeg_upstream
xytpai Jan 27, 2026
bc8c702
Merge branch 'main' into xyt/rocjpeg_upstream
zy1git Jan 28, 2026
173c23d
Merge branch 'main' into xyt/rocjpeg_upstream
zy1git Jan 28, 2026
09c589d
Merge branch 'main' into xyt/rocjpeg_upstream
xytpai Feb 9, 2026
8d4f6ff
Merge branch 'main' into xyt/rocjpeg_upstream
xytpai Jun 12, 2026
b68f0ef
full format support
xytpai Jun 13, 2026
e113fcc
remove stream dependency
xytpai Jun 13, 2026
85b55f1
make batch-size dynamic
xytpai Jun 13, 2026
dd23f0e
resolve remaining comments
xytpai Jun 13, 2026
722a4af
[ROCm] Clean up rocJPEG decode and share GPU JPEG scaffolding (#2)
jeffdaily Jun 18, 2026
a319739
refine IMAGE_READ_MODE_UNCHANGED
xytpai Jun 18, 2026
4b71908
rm dead code & refine comment
xytpai Jun 18, 2026
7ce968f
recover nv path
xytpai Jun 18, 2026
248894c
resolve comments
xytpai Jun 18, 2026
802cac2
apply clang-format
xytpai Jun 18, 2026
d942228
Separate rocJPEG and nvJPEG setup blocks
xytpai Jun 19, 2026
7581393
add _ suffix for private class members
xytpai Jun 19, 2026
a4073b0
just return padded tensor in its original layout
xytpai Jun 19, 2026
a2572c8
rm unnecessary sync
xytpai Jun 22, 2026
b413e54
refine code
xytpai Jun 22, 2026
0fe060a
add rocjpeg doc link
xytpai Jun 22, 2026
2e4047d
refine code
xytpai Jun 23, 2026
156b4ec
Split rocjpeg code (#3)
xytpai Jun 23, 2026
be0b6e5
Merge branch 'main' into xyt/rocjpeg_upstream
xytpai Jun 23, 2026
6620f0f
Merge branch 'main' of github.com:pytorch/vision into xyt/rocjpeg_ups…
NicolasHug Jun 25, 2026
3f2f94f
Let ROCm CI job test the jpeg decoder
NicolasHug Jun 25, 2026
8a34d7c
Merge branch 'main' into testrocmjpeg
NicolasHug Jun 25, 2026
81233bf
Merge branch 'main' into xyt/rocjpeg_upstream
NicolasHug Jun 25, 2026
89f074b
Merge branch 'pytorch:main' into xyt/rocjpeg_upstream
xytpai Jun 26, 2026
6903c5b
add stable abi support
xytpai Jun 27, 2026
004ebb5
add support for make_image_extension
xytpai Jun 27, 2026
f329f79
Merge branch 'testrocmjpeg' of github.com:pytorch/vision into testroc…
NicolasHug Jun 27, 2026
5e2b788
install rocm devel
NicolasHug Jun 27, 2026
e6fa3e4
hmmmmmmmmmmmmmmm
NicolasHug Jun 27, 2026
d8b598f
Skip encoder tests
NicolasHug Jun 27, 2026
67f8a00
simplify setup.py
NicolasHug Jun 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/scripts/setup-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,35 @@ if [[ $GPU_ARCH_TYPE == 'cuda' || $GPU_ARCH_TYPE == 'rocm' ]]; then
fi
echo '::endgroup::'

if [[ $GPU_ARCH_TYPE == 'rocm' ]]; then
echo '::group::Install rocJPEG SDK'
# rocJPEG is shipped as a separate SDK package and isn't in the base ROCm
# builder image. Without its header ($ROCM_HOME/include/rocjpeg/rocjpeg.h)
# setup.py silently builds the HIP jpeg ops as stubs ("not compiled with
# nvJPEG support"), so install it before building torchvision.
#
# rocjpeg-devel requires libva-devel >= 2.16.0 or libva-amdgpu-devel. The base
# image's libva-devel is too old and libva-amdgpu-devel lives in AMD's separate
# "graphics" repo (not the rocm repo), so add that repo first. Derive the ROCm
# version from the existing rocm repo config, falling back to 7.1.1.
rocm_ver=$(grep -rhoE 'repo\.radeon\.com/rocm/[^/]+/[0-9][0-9.]*' /etc/yum.repos.d/ \
| grep -oE '[0-9][0-9.]*$' | head -1)
rocm_ver=${rocm_ver:-7.1.1}
cat > /etc/yum.repos.d/amdgpu-graphics.repo <<EOF
[amdgpu-graphics]
name=AMD Graphics ${rocm_ver} repository
baseurl=https://repo.radeon.com/graphics/${rocm_ver}/el/8/main/x86_64/
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
EOF
dnf clean all
dnf install -y libva-amdgpu-devel rocjpeg-devel \
|| yum install -y libva-amdgpu-devel rocjpeg-devel
echo '::endgroup::'
fi

echo '::group::Install TorchVision'
pip install -e . -v --no-build-isolation
echo '::endgroup::'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
echo '::endgroup::'
echo '::group::Run image tests'
pytest --junit-xml="${RUNNER_TEST_RESULTS_DIR}/test-results.xml" -v --durations=25 test/test_image.py -k "not cuda"
pytest --junit-xml="${RUNNER_TEST_RESULTS_DIR}/test-results.xml" -v --durations=25 test/test_image.py
echo '::endgroup::'
unittests-macos:
Expand Down
47 changes: 35 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
USE_JPEG = os.getenv("TORCHVISION_USE_JPEG", "1") == "1"
USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1"
USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1"
USE_ROCJPEG = os.getenv("TORCHVISION_USE_ROCJPEG", "1") == "1"
NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)

TORCHVISION_INCLUDE = os.environ.get("TORCHVISION_INCLUDE", "")
Expand All @@ -45,6 +46,7 @@
print(f"{USE_JPEG = }")
print(f"{USE_WEBP = }")
print(f"{USE_NVJPEG = }")
print(f"{USE_ROCJPEG = }")
print(f"{NVCC_FLAGS = }")
print(f"{TORCHVISION_INCLUDE = }")
print(f"{TORCHVISION_LIBRARY = }")
Expand Down Expand Up @@ -162,10 +164,12 @@ def get_macros_and_flags():
CSRS_DIR / "ops/cpu/nms_kernel.cpp",
CSRS_DIR / "ops/mps/nms_kernel.mm",
CSRS_DIR / "ops/quantized/cpu/qnms_kernel.cpp",
CSRS_DIR / "io/image/cuda/decode_jpegs_cuda.cpp",
CSRS_DIR / "io/image/common_stable.cpp",
}
STABLE_SOURCES.add(CSRS_DIR / ("ops/hip/nms_kernel.hip" if IS_ROCM else "ops/cuda/nms_kernel.cu"))
STABLE_SOURCES.add(
CSRS_DIR / ("io/image/hip/decode_jpegs_cuda.cpp" if IS_ROCM else "io/image/cuda/decode_jpegs_cuda.cpp")
)


def _not_stable(paths):
Expand Down Expand Up @@ -440,18 +444,23 @@ def make_image_extension():
else:
warnings.warn("Building torchvision without WEBP support")

if USE_NVJPEG and (torch.cuda.is_available() or FORCE_CUDA):
nvjpeg_found = CUDA_HOME is not None and (Path(CUDA_HOME) / "include/nvjpeg.h").exists()

if nvjpeg_found:
print("Building torchvision with NVJPEG image support")
libraries.append("nvjpeg")
define_macros += [("NVJPEG_FOUND", 1)]
Extension = CUDAExtension
else:
# NVJPEG is needed here for the GPU JPEG *encoder* (encode_jpegs_cuda.cpp). The
# GPU decoder lives in the stable extension (see make_image_stable_extension).
# ROCm has no rocJPEG equivalent for encoding, so the HIP encoder is always a
# stub and this extension needs nothing from rocJPEG.
if not IS_ROCM:
if USE_NVJPEG and (torch.cuda.is_available() or FORCE_CUDA):
nvjpeg_found = CUDA_HOME is not None and (Path(CUDA_HOME) / "include/nvjpeg.h").exists()

if nvjpeg_found:
print("Building torchvision with NVJPEG image support")
libraries.append("nvjpeg")
define_macros += [("NVJPEG_FOUND", 1)]
Extension = CUDAExtension
else:
warnings.warn("Building torchvision without NVJPEG support")
elif USE_NVJPEG:
warnings.warn("Building torchvision without NVJPEG support")
elif USE_NVJPEG:
warnings.warn("Building torchvision without NVJPEG support")

return Extension(
name="torchvision.image",
Expand Down Expand Up @@ -480,14 +489,28 @@ def make_image_stable_extension():
+ _stable(image_dir.glob("hip/*.cpp" if IS_ROCM else "cuda/*.cpp"))
)

# This extension holds the GPU JPEG *decoder*, which is the only consumer of
# nvJPEG decoding / rocJPEG (rocJPEG is decode-only). IS_ROCM and CUDA are
# mutually exclusive, so these two blocks are independent.
Extension = CppExtension
if USE_NVJPEG and (torch.cuda.is_available() or FORCE_CUDA):
nvjpeg_found = CUDA_HOME is not None and (Path(CUDA_HOME) / "include/nvjpeg.h").exists()
if nvjpeg_found:
print("Building torchvision with NVJPEG image support")
libraries.append("nvjpeg")
define_macros += [("NVJPEG_FOUND", 1)]
Extension = CUDAExtension

if USE_ROCJPEG and IS_ROCM and (torch.cuda.is_available() or FORCE_CUDA):
rocjpeg_found = ROCM_HOME is not None and (Path(ROCM_HOME) / "include/rocjpeg/rocjpeg.h").exists()
if rocjpeg_found:
print("Building torchvision with ROCJPEG image support")
libraries.append("rocjpeg")
define_macros += [("ROCJPEG_FOUND", 1)]
Extension = CUDAExtension
else:
warnings.warn("Building torchvision without ROCJPEG support")

return Extension(
name="torchvision.image_stable",
sources=sorted(str(s) for s in sources),
Expand Down
15 changes: 14 additions & 1 deletion test/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
IS_WINDOWS = sys.platform in ("win32", "cygwin")
IS_MACOS = sys.platform == "darwin"
IS_LINUX = sys.platform == "linux"
# rocJPEG (the ROCm GPU JPEG backend) is decode-only: AMD provides no GPU JPEG
# encoder, so the CUDA encode tests can't pass on ROCm.
IS_ROCM = torch.version.hip is not None
PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split("."))
WEBP_TEST_IMAGES_DIR = os.environ.get("WEBP_TEST_IMAGES_DIR", "")
# See https://github.com/pytorch/vision/pull/8724#issuecomment-2503964558
Expand Down Expand Up @@ -426,12 +429,15 @@ def test_decode_jpegs_cuda(mode, scripted):
futures = [executor.submit(decode_fn, encoded_images, mode, "cuda") for _ in range(num_workers)]
decoded_images_threaded = [future.result() for future in futures]
assert len(decoded_images_threaded) == num_workers
# rocJPEG's color conversion differs slightly from nvJPEG, so it needs a
# looser tolerance against the CPU reference.
tol = 2.5 if torch.version.hip is not None else 2
for decoded_images in decoded_images_threaded:
assert len(decoded_images) == len(encoded_images)
for decoded_image_cuda, decoded_image_cpu in zip(decoded_images, decoded_images_cpu):
assert decoded_image_cuda.shape == decoded_image_cpu.shape
assert decoded_image_cuda.dtype == decoded_image_cpu.dtype == torch.uint8
assert (decoded_image_cuda.cpu().float() - decoded_image_cpu.cpu().float()).abs().mean() < 2
assert (decoded_image_cuda.cpu().float() - decoded_image_cpu.cpu().float()).abs().mean() < tol


@needs_cuda
Expand Down Expand Up @@ -576,6 +582,7 @@ def test_encode_jpeg(img_path, scripted):


@needs_cuda
@pytest.mark.skipif(IS_ROCM, reason="rocJPEG is decode-only; GPU JPEG encoding is not supported on ROCm")
def test_encode_jpeg_cuda_device_param():
path = next(path for path in get_images(IMAGE_ROOT, ".jpg") if "cmyk" not in path)

Expand All @@ -596,6 +603,7 @@ def test_encode_jpeg_cuda_device_param():


@needs_cuda
@pytest.mark.skipif(IS_ROCM, reason="rocJPEG is decode-only; GPU JPEG encoding is not supported on ROCm")
@pytest.mark.parametrize(
"img_path",
[pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(IMAGE_ROOT, ".jpg")],
Expand Down Expand Up @@ -625,6 +633,7 @@ def test_encode_jpeg_cuda(img_path, scripted, contiguous):


@needs_cuda
@pytest.mark.skipif(IS_ROCM, reason="rocJPEG is decode-only; GPU JPEG encoding is not supported on ROCm")
def test_encode_jpeg_cuda_sync():
"""
Non-regression test for https://github.com/pytorch/vision/issues/8587.
Expand Down Expand Up @@ -666,6 +675,8 @@ def test_encode_jpeg_cuda_sync():
def test_encode_jpegs_batch(scripted, contiguous, device):
if device == "cpu" and IS_MACOS:
pytest.skip("https://github.com/pytorch/vision/issues/8031")
if device == "cuda" and IS_ROCM:
pytest.skip("rocJPEG is decode-only; GPU JPEG encoding is not supported on ROCm")
decoded_images_tv = []
for jpeg_path in get_images(IMAGE_ROOT, ".jpg"):
if "cmyk" in jpeg_path:
Expand Down Expand Up @@ -711,6 +722,7 @@ def test_encode_jpegs_batch(scripted, contiguous, device):


@needs_cuda
@pytest.mark.skipif(IS_ROCM, reason="rocJPEG is decode-only; GPU JPEG encoding is not supported on ROCm")
def test_single_encode_jpeg_cuda_errors():
with pytest.raises(RuntimeError, match="Input tensor dtype should be uint8"):
encode_jpeg(torch.empty((3, 100, 100), dtype=torch.float32, device="cuda"))
Expand All @@ -729,6 +741,7 @@ def test_single_encode_jpeg_cuda_errors():


@needs_cuda
@pytest.mark.skipif(IS_ROCM, reason="rocJPEG is decode-only; GPU JPEG encoding is not supported on ROCm")
def test_batch_encode_jpegs_cuda_errors():
with pytest.raises(RuntimeError, match="Input tensor dtype should be uint8"):
encode_jpeg(
Expand Down
9 changes: 7 additions & 2 deletions torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,8 +596,9 @@ std::vector<torch::stable::Tensor> CUDAJpegDecoder::decode_images(
i < output_tensors.size();
++i) {
if (channels[i] == 1) {
output_tensors[i] = torch::stable::clone(torch::stable::unsqueeze(
torch::stable::select(output_tensors[i], 0, 0), 0));
output_tensors[i] = torch::stable::clone(
torch::stable::unsqueeze(
torch::stable::select(output_tensors[i], 0, 0), 0));
}
}
}
Expand All @@ -618,9 +619,13 @@ STABLE_TORCH_LIBRARY_FRAGMENT(image, m) {
"decode_jpegs_cuda(Tensor[] encoded_images, int mode, Device device) -> Tensor[]");
}

// In ROCm builds, the hand-written rocJPEG implementation registers this op.
// Keep this registration for nvJPEG and the no-GPU-JPEG fallback only.
#if !ROCJPEG_FOUND
STABLE_TORCH_LIBRARY_IMPL(image, CompositeExplicitAutograd, m) {
m.impl("decode_jpegs_cuda", TORCH_BOX(&decode_jpegs_cuda));
}
#endif

} // namespace image
} // namespace vision
Loading
Loading