diff --git a/.github/workflows/ci-riscv64.yml b/.github/workflows/ci-riscv64.yml new file mode 100644 index 0000000000000..273034f514d79 --- /dev/null +++ b/.github/workflows/ci-riscv64.yml @@ -0,0 +1,167 @@ +# Note: this runner is provided externally, so we minimize its access to +# secrets. + +name: CI (riscv64) + +on: + push: + branches: [riscv] + + pull_request_target: + types: [opened, synchronize, reopened] + +permissions: + contents: read + # No permissions to secrets. + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + core-ci: + name: pytorch-riscv64-core-ci + runs-on: [self-hosted, linux, amd64] + + outputs: + base_commit: ${{ steps.meta.outputs.base_commit }} + head_commit: ${{ steps.meta.outputs.head_commit }} + patch_file: ${{ steps.patch.outputs.patch_file }} + ci_result_base_url: ${{ steps.jenkins.outputs.ci_result_base_url }} + ci_stat_url: ${{ steps.jenkins.outputs.ci_stat_url }} + + # This is in its own separate environment. + environment: riscv64 + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + fetch-depth: 3000 # shadow clone? + ref: ${{ github.sha }} # including latest sha + + - name: Extract PR info + run: | + echo "BASE_SHA=${{ github.event.pull_request.base.sha }}" >> $GITHUB_ENV + echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + + - name: Diff base and head + id: meta + run: | + if [[ "${{ github.event_name }}" = "pull_request" || "${{ github.event_name }}" == "pull_request_target" ]]; then + echo "Push PR build" + BASE_REF="${{ github.base_ref }}" + HEAD_REF="${{ github.head_ref }}" + + echo "Base ref: $BASE_REF" + echo "Head ref: $HEAD_REF" + + # must based on riscv + if [ "$BASE_REF" != "riscv" ]; then + echo "ERROR: PR must target 'riscv' branch, got '$BASE_REF'" + exit 1 + fi + + # need to get contents of the PR + git fetch --quiet origin pull/${{ github.event.pull_request.number }}/head:pr-head + git fetch --quiet origin main + BASE=$(git merge-base pr-head origin/main) + HEAD=$(git rev-parse pr-head) + else + echo "Push to riscv" + # 统一用 riscv 作为 baseline + git fetch --quiet origin main + #git fetch origin riscv + + BASE=$(git merge-base ${{ github.sha }} origin/main) # The latest commit + HEAD=${{ github.sha }} + + fi + + echo "BASE_COMMIT=$BASE" >> $GITHUB_ENV + echo "HEAD_COMMIT=$HEAD" >> $GITHUB_ENV + + echo "base_commit=$BASE" >> "$GITHUB_OUTPUT" + echo "head_commit=$HEAD" >> "$GITHUB_OUTPUT" + + echo "Base: $BASE" + echo "Head: $HEAD" + + - name: Generate patch + id: patch + run: | + echo "Generating patch..." + + SHORT_HEAD=${HEAD_COMMIT:0:7} + PATCH_NAME="patch_${SHORT_HEAD}.patch" + + git diff $BASE_COMMIT $HEAD_COMMIT > $PATCH_NAME + + echo "Patch size:" + wc -l $PATCH_NAME + + cp $PATCH_NAME /home/jenkins/patch/ + cat /home/jenkins/patch/$PATCH_NAME + + echo "PATCH_FILE=$PATCH_NAME" >> "$GITHUB_ENV" + echo "patch_file=$PATCH_NAME" >> "$GITHUB_OUTPUT" + + - name: Trigger Jenkins Job + id: jenkins + run: | + set -euo pipefail + + BASE=${{ steps.meta.outputs.base_commit }} + PATCH=${{ steps.patch.outputs.patch_file }} + + bash /home/jenkins/scripts/jenkins-run.sh $BASE $PATCH | tee jenkins.log + + CI_STAT_URL=$(grep -oE 'https://[^ ]+/pytorch-ci-stat\.json' jenkins.log | tail -n1) + + if [[ -z "$CI_STAT_URL" ]]; then + echo "ERROR: cannot find pytorch-ci-stat.json URL from Jenkins log" + exit 1 + fi + + CI_RESULT_BASE_URL="${CI_STAT_URL%/pytorch-ci-stat.json}" + + echo "ci_stat_url=$CI_STAT_URL" >> "$GITHUB_OUTPUT" + echo "ci_result_base_url=$CI_RESULT_BASE_URL" >> "$GITHUB_OUTPUT" + + echo "CI_STAT_URL=$CI_STAT_URL" + echo "CI_RESULT_BASE_URL=$CI_RESULT_BASE_URL" + + full-ci: + name: pytorch-riscv64-full-ci + runs-on: [self-hosted, linux, amd64] + needs: core-ci + if: always() + continue-on-error: true + + steps: + - name: Query existing full test result + shell: bash + run: | + set -euo pipefail + + BASE_URL="${{ needs.core-ci.outputs.ci_result_base_url }}" + STAT_URL="${BASE_URL}/pytorch-ci-stat.json" + + echo "STAT_URL=$STAT_URL" + + curl -fsSL "$STAT_URL" -o pytorch-ci-stat.json + + echo "==== FULL TEST STAT ====" + cat pytorch-ci-stat.json + echo + + FAILED=$(jq '.failed | length' pytorch-ci-stat.json) + + if [[ "$FAILED" != "0" ]]; then + echo "==== FULL TEST FAILED ====" + echo "failed cases: $FAILED" + exit 1 + fi + + echo "==== FULL TEST PASSED ====" + echo "full test no failures" + diff --git a/.gitmodules b/.gitmodules index 076ce38ac7938..35feb14cec9ae 100644 --- a/.gitmodules +++ b/.gitmodules @@ -41,7 +41,7 @@ [submodule "third_party/cpuinfo"] ignore = dirty path = third_party/cpuinfo - url = https://github.com/pytorch/cpuinfo.git + url = https://github.com/RuyiAI-Stack/cpuinfo.git [submodule "third_party/python-peachpy"] ignore = dirty path = third_party/python-peachpy diff --git a/test/inductor/test_cpu_select_algorithm.py b/test/inductor/test_cpu_select_algorithm.py index 236712f7950bb..8c064db44220e 100644 --- a/test/inductor/test_cpu_select_algorithm.py +++ b/test/inductor/test_cpu_select_algorithm.py @@ -1583,6 +1583,7 @@ def forward(self, x): vec_amx = VecAMX() self._check_amx_counter(vec_amx) + @unittest.skipIf(not torch._C._has_mkldnn, "MKLDNN is not enabled") @inductor_config.patch({"freezing": True}) @patches @torch.no_grad @@ -1700,6 +1701,7 @@ def forward(self, x, scale): vec_amx = VecAMX() self._check_amx_counter(vec_amx) + @unittest.skipIf(not torch._C._has_mkldnn, "MKLDNN is not enabled") @inductor_config.patch({"freezing": True, "cpp.enable_concat_linear": True}) @patches @torch.no_grad diff --git a/test/run_test.py b/test/run_test.py index 9a85ecd2195cd..22561c879d13d 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -110,6 +110,7 @@ def upload_adhoc_failure_json(*args, **kwargs): INDUCTOR_TEST_PREFIX = "inductor" IS_SLOW = "slow" in TEST_CONFIG or "slow" in BUILD_ENVIRONMENT IS_S390X = platform.machine() == "s390x" +IS_RISCV64 = platform.machine() == "riscv64" # Note [ROCm parallel CI testing] @@ -285,6 +286,52 @@ def __contains__(self, item): "test_xpu", ] +RISCV64_BLOCKLIST = [ + # disable distributed related test + "inductor/test_distributed_patterns", + "fx/test_dce_pass", + "export/test_cpp_serdes", + "export/test_export", + "export/test_export_strict", + "export/test_export_training_ir_to_run_decomp", + "export/test_retraceability", + "export/test_serdes", + "export/test_strict_export_v2", + "test_public_bindings", + "ao/sparsity/test_composability", + # QNNPACK is not supported + "export/test_converter", + # record_contex_cpp is not support on non-linux non-x86_64 platforms + "torch_np/numpy_tests/core/test_numeric", + # Failed to import torch.distributed.run: cannot import name 'Store' from 'torch.distributed' + "test_testing", + "inductor/test_aot_inductor_arrayref", + "inductor/test_cpu_repro", + # TODO: mkldnn not available, shape guard failures on RISC-V + "inductor/test_cpu_select_algorithm", + # TODO:scalar value not equal, need to fix + "profiler/test_profiler", + # TODO precision + "test_binary_ufuncs", + "test_decomp", + # TODO no CUDA related module + "quantization/core/test_workflow_module", # TestFakeQuantize.test_fq_module_per_channel + "quantization/core/test_workflow_ops", + "quantization/core/test_quantized_op", + # z3-solver build fail + "test_proxy_tensor", + # too slow on riscv64 + # 53013.55 s + "functorch/test_aotdispatch", + # 25069 s + "functorch/test_ops", + # 17528 s + "test_transformers", + # 10897 s + "functorch/test_vmap", +] + + # The tests inside these files should never be run in parallel with each other RUN_PARALLEL_BLOCKLIST = [ "test_extension_utils", @@ -1875,6 +1922,13 @@ def get_selected_tests(options) -> list[str]: selected_tests, "Skip distributed tests on s390x", ) + elif IS_RISCV64: + selected_tests = exclude_tests(RISCV64_BLOCKLIST, selected_tests, "on riscv64") + selected_tests = exclude_tests( + DISTRIBUTED_TESTS, + selected_tests, + "Skip distributed tests on riscv64", + ) # skip all distributed tests if distributed package is not available. if not dist.is_available(): diff --git a/test/test_linalg.py b/test/test_linalg.py index 0ac35e077946d..5e7018e1cf9ab 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -9002,6 +9002,7 @@ def test_matrix_exp_backward_input_validation(self, device, dtype): with self.assertRaisesRegex(RuntimeError, "must be batches of square matrices"): torch.ops.aten.matrix_exp_backward(non_square, grad_non_square) + @slowTest @skipCUDAIfNoMagmaAndNoLinalgsolver @skipCPUIfNoLapack @dtypes(torch.float, torch.double, torch.complex64, torch.complex128) diff --git a/test/test_tensor_creation_ops.py b/test/test_tensor_creation_ops.py index b9e8e537b53cb..99396cea12f5b 100644 --- a/test/test_tensor_creation_ops.py +++ b/test/test_tensor_creation_ops.py @@ -32,6 +32,7 @@ IS_SANDCASTLE, IS_S390X, IS_ARM64, + IS_RISCV64, parametrize, TEST_WITH_TORCHDYNAMO, xfailIfTorchDynamo, @@ -1109,6 +1110,13 @@ def test_float_to_int_conversion_nonfinite(self, device, dtype): if dtype == torch.bool: refs = (True, True, True) + elif IS_RISCV64: + if dtype in (torch.int32, torch.int64): + refs = (torch.iinfo(dtype).min, torch.iinfo(dtype).max, torch.iinfo(dtype).max) + elif dtype == torch.uint8: + refs = (0, torch.iinfo(dtype).max, torch.iinfo(dtype).max) + elif dtype in (torch.int8, torch.int16): + refs = (0, -1, -1) elif IS_ARM64: refs = (torch.iinfo(dtype).min, torch.iinfo(dtype).max, 0) if dtype in (torch.int8, torch.int16): diff --git a/test/test_torch.py b/test/test_torch.py index cf95adbc2c916..f93661cf386f4 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -43,7 +43,7 @@ wrapDeterministicFlagAPITest, DeterministicGuard, CudaSyncGuard, bytes_to_scalar, parametrize, noncontiguous_like, AlwaysWarnTypedStorageRemoval, TEST_WITH_TORCHDYNAMO, xfailIfTorchDynamo, - xfailIfS390X, set_warn_always_context, decorateIf, isRocmArchAnyOf) + xfailIfS390X, xfailIfRISCV, set_warn_always_context, decorateIf, isRocmArchAnyOf) from multiprocessing.reduction import ForkingPickler from torch.testing._internal.common_device_type import ( expectedFailureMeta, @@ -9594,14 +9594,21 @@ def test_type(self): # FIXME: port to a quantization test suite @xfailIfS390X + @xfailIfRISCV def test_qengine(self): qengines = torch.backends.quantized.supported_engines + if not qengines: + self.skipTest("No quantized engines supported on this platform") original_qe = torch.backends.quantized.engine for qe in qengines: torch.backends.quantized.engine = qe if torch.backends.quantized.engine != qe: raise AssertionError(f"qengine not set successfully: expected {qe}, got {torch.backends.quantized.engine}") - torch.backends.quantized.engine = original_qe + # On platforms where no qengine is compiled in as the default (e.g. RISC-V), + # the initial engine reads as "none" (NoQEngine), which is not a valid value + # to pass back to _set_qengine. Only restore if it was a real engine. + if original_qe != "none": + torch.backends.quantized.engine = original_qe def test_terminate_handler_on_crash(self): cmd = [sys.executable, '-c', "import os; os.environ[\"TORCH_CUSTOM_TERMINATE\"] ='1'; \ diff --git a/third_party/kineto b/third_party/kineto index 5902263f641a8..7739225509b84 160000 --- a/third_party/kineto +++ b/third_party/kineto @@ -1 +1 @@ -Subproject commit 5902263f641a8634af077031fb4befc7162cefbb +Subproject commit 7739225509b847e7b1ce7638f1ead383d15b077f diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 608c8ee4b84b3..099382a195ed0 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -1438,11 +1438,13 @@ def printErrors(self) -> None: IS_PPC = platform.machine() == "ppc64le" IS_X86 = platform.machine() in ('x86_64', 'i386') IS_ARM64 = platform.machine() in ('arm64', 'aarch64', 'ARM64') +IS_RISCV64 = platform.machine() == 'riscv64' IS_S390X = platform.machine() == "s390x" IS_AVX512_VNNI_SUPPORTED = torch.cpu.get_capabilities().get("avx512_vnni", False) IS_CPU_EXT_SVE_SUPPORTED = torch.cpu.get_capabilities().get("sve", False) IS_CPU_CAPABILITY_SVE = torch._C._get_cpu_capability() in ("SVE128", "SVE256") IS_CPU_CAPABILITY_SVE256 = torch._C._get_cpu_capability() == "SVE256" +IS_RISCV = platform.machine() in ('riscv64', 'riscv32') if IS_WINDOWS: @contextmanager @@ -2265,6 +2267,9 @@ def wrap_fn(self, *args, **kwargs): def xfailIfS390X(func): return unittest.expectedFailure(func) if IS_S390X else func +def xfailIfRISCV(func): + return unittest.expectedFailure(func) if IS_RISCV else func + def xfailIf(condition): def wrapper(func): if condition: @@ -5668,27 +5673,22 @@ def check_bytes(byte_list): if not (0 <= byte <= 255): raise AssertionError(f"byte value out of range: expected 0 <= byte <= 255, got {byte}") - if dtype.is_complex: - if len(byte_list) != (num_bytes * 2): - raise AssertionError( - f"expected len(byte_list) == {num_bytes * 2} for complex dtype, got {len(byte_list)}" - ) - check_bytes(byte_list) - real = ctype.from_buffer((ctypes.c_byte * num_bytes)( - *byte_list[:num_bytes])).value - imag = ctype.from_buffer((ctypes.c_byte * num_bytes)( - *byte_list[num_bytes:])).value - res = real + 1j * imag - else: - if len(byte_list) != num_bytes: - raise AssertionError( - f"expected len(byte_list) == {num_bytes}, got {len(byte_list)}" - ) - check_bytes(byte_list) - res = ctype.from_buffer((ctypes.c_byte * num_bytes)( - *byte_list)).value - - return torch.tensor(res, device=device, dtype=dtype) + expected_len = num_bytes * 2 if dtype.is_complex else num_bytes + if len(byte_list) != expected_len: + raise AssertionError( + f"expected len(byte_list) == {expected_len}" + f"{' for complex dtype' if dtype.is_complex else ''}, got {len(byte_list)}" + ) + check_bytes(byte_list) + + # Write bytes directly into storage to preserve exact bit patterns + # (e.g. NaN payloads, which are not preserved when round-tripping through + # Python float/complex, especially on architectures like RISC-V that + # canonicalize NaNs). + res = torch.empty((), dtype=dtype, device=device) + src = torch.tensor(byte_list, dtype=torch.uint8, device=device) + res.untyped_storage().copy_(src.untyped_storage()) + return res def copy_func(f):