Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions .github/workflows/ci-riscv64.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Note: this runner is provided externally, so we minimize its access to
# secrets.

name: CI (riscv64)

on:
push:
branches: [riscv]

pull_request_target:
types: [opened, synchronize, reopened]

permissions:
contents: read
# No permissions to secrets.

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
core-ci:
name: pytorch-riscv64-core-ci
runs-on: [self-hosted, linux, amd64]

outputs:
base_commit: ${{ steps.meta.outputs.base_commit }}
head_commit: ${{ steps.meta.outputs.head_commit }}
patch_file: ${{ steps.patch.outputs.patch_file }}
ci_result_base_url: ${{ steps.jenkins.outputs.ci_result_base_url }}
ci_stat_url: ${{ steps.jenkins.outputs.ci_stat_url }}

# This is in its own separate environment.
environment: riscv64
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
fetch-depth: 3000 # shadow clone?
ref: ${{ github.sha }} # including latest sha

- name: Extract PR info
run: |
echo "BASE_SHA=${{ github.event.pull_request.base.sha }}" >> $GITHUB_ENV
echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV

- name: Diff base and head
id: meta
run: |
if [[ "${{ github.event_name }}" = "pull_request" || "${{ github.event_name }}" == "pull_request_target" ]]; then
echo "Push PR build"
BASE_REF="${{ github.base_ref }}"
HEAD_REF="${{ github.head_ref }}"

echo "Base ref: $BASE_REF"
echo "Head ref: $HEAD_REF"

# must based on riscv
if [ "$BASE_REF" != "riscv" ]; then
echo "ERROR: PR must target 'riscv' branch, got '$BASE_REF'"
exit 1
fi

# need to get contents of the PR
git fetch --quiet origin pull/${{ github.event.pull_request.number }}/head:pr-head
git fetch --quiet origin main
BASE=$(git merge-base pr-head origin/main)
HEAD=$(git rev-parse pr-head)
else
echo "Push to riscv"
# 统一用 riscv 作为 baseline
git fetch --quiet origin main
#git fetch origin riscv

BASE=$(git merge-base ${{ github.sha }} origin/main) # The latest commit
HEAD=${{ github.sha }}

fi

echo "BASE_COMMIT=$BASE" >> $GITHUB_ENV
echo "HEAD_COMMIT=$HEAD" >> $GITHUB_ENV

echo "base_commit=$BASE" >> "$GITHUB_OUTPUT"
echo "head_commit=$HEAD" >> "$GITHUB_OUTPUT"

echo "Base: $BASE"
echo "Head: $HEAD"

- name: Generate patch
id: patch
run: |
echo "Generating patch..."

SHORT_HEAD=${HEAD_COMMIT:0:7}
PATCH_NAME="patch_${SHORT_HEAD}.patch"

git diff $BASE_COMMIT $HEAD_COMMIT > $PATCH_NAME

echo "Patch size:"
wc -l $PATCH_NAME

cp $PATCH_NAME /home/jenkins/patch/
cat /home/jenkins/patch/$PATCH_NAME

echo "PATCH_FILE=$PATCH_NAME" >> "$GITHUB_ENV"
echo "patch_file=$PATCH_NAME" >> "$GITHUB_OUTPUT"

- name: Trigger Jenkins Job
id: jenkins
run: |
set -euo pipefail

BASE=${{ steps.meta.outputs.base_commit }}
PATCH=${{ steps.patch.outputs.patch_file }}

bash /home/jenkins/scripts/jenkins-run.sh $BASE $PATCH | tee jenkins.log

CI_STAT_URL=$(grep -oE 'https://[^ ]+/pytorch-ci-stat\.json' jenkins.log | tail -n1)

if [[ -z "$CI_STAT_URL" ]]; then
echo "ERROR: cannot find pytorch-ci-stat.json URL from Jenkins log"
exit 1
fi

CI_RESULT_BASE_URL="${CI_STAT_URL%/pytorch-ci-stat.json}"

echo "ci_stat_url=$CI_STAT_URL" >> "$GITHUB_OUTPUT"
echo "ci_result_base_url=$CI_RESULT_BASE_URL" >> "$GITHUB_OUTPUT"

echo "CI_STAT_URL=$CI_STAT_URL"
echo "CI_RESULT_BASE_URL=$CI_RESULT_BASE_URL"

full-ci:
name: pytorch-riscv64-full-ci
runs-on: [self-hosted, linux, amd64]
needs: core-ci
if: always()
continue-on-error: true

steps:
- name: Query existing full test result
shell: bash
run: |
set -euo pipefail

BASE_URL="${{ needs.core-ci.outputs.ci_result_base_url }}"
STAT_URL="${BASE_URL}/pytorch-ci-stat.json"

echo "STAT_URL=$STAT_URL"

curl -fsSL "$STAT_URL" -o pytorch-ci-stat.json

echo "==== FULL TEST STAT ===="
cat pytorch-ci-stat.json
echo

FAILED=$(jq '.failed | length' pytorch-ci-stat.json)

if [[ "$FAILED" != "0" ]]; then
echo "==== FULL TEST FAILED ===="
echo "failed cases: $FAILED"
exit 1
fi

echo "==== FULL TEST PASSED ===="
echo "full test no failures"

2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
[submodule "third_party/cpuinfo"]
ignore = dirty
path = third_party/cpuinfo
url = https://github.com/pytorch/cpuinfo.git
url = https://github.com/RuyiAI-Stack/cpuinfo.git
[submodule "third_party/python-peachpy"]
ignore = dirty
path = third_party/python-peachpy
Expand Down
2 changes: 2 additions & 0 deletions test/inductor/test_cpu_select_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,6 +1583,7 @@ def forward(self, x):
vec_amx = VecAMX()
self._check_amx_counter(vec_amx)

@unittest.skipIf(not torch._C._has_mkldnn, "MKLDNN is not enabled")
@inductor_config.patch({"freezing": True})
@patches
@torch.no_grad
Expand Down Expand Up @@ -1700,6 +1701,7 @@ def forward(self, x, scale):
vec_amx = VecAMX()
self._check_amx_counter(vec_amx)

@unittest.skipIf(not torch._C._has_mkldnn, "MKLDNN is not enabled")
@inductor_config.patch({"freezing": True, "cpp.enable_concat_linear": True})
@patches
@torch.no_grad
Expand Down
54 changes: 54 additions & 0 deletions test/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def upload_adhoc_failure_json(*args, **kwargs):
INDUCTOR_TEST_PREFIX = "inductor"
IS_SLOW = "slow" in TEST_CONFIG or "slow" in BUILD_ENVIRONMENT
IS_S390X = platform.machine() == "s390x"
IS_RISCV64 = platform.machine() == "riscv64"


# Note [ROCm parallel CI testing]
Expand Down Expand Up @@ -285,6 +286,52 @@ def __contains__(self, item):
"test_xpu",
]

RISCV64_BLOCKLIST = [
# disable distributed related test
"inductor/test_distributed_patterns",
"fx/test_dce_pass",
"export/test_cpp_serdes",
"export/test_export",
"export/test_export_strict",
"export/test_export_training_ir_to_run_decomp",
"export/test_retraceability",
"export/test_serdes",
"export/test_strict_export_v2",
"test_public_bindings",
"ao/sparsity/test_composability",
# QNNPACK is not supported
"export/test_converter",
# record_contex_cpp is not support on non-linux non-x86_64 platforms
"torch_np/numpy_tests/core/test_numeric",
# Failed to import torch.distributed.run: cannot import name 'Store' from 'torch.distributed'
"test_testing",
"inductor/test_aot_inductor_arrayref",
"inductor/test_cpu_repro",
# TODO: mkldnn not available, shape guard failures on RISC-V
"inductor/test_cpu_select_algorithm",
# TODO:scalar value not equal, need to fix
"profiler/test_profiler",
# TODO precision
"test_binary_ufuncs",
"test_decomp",
# TODO no CUDA related module
"quantization/core/test_workflow_module", # TestFakeQuantize.test_fq_module_per_channel
"quantization/core/test_workflow_ops",
"quantization/core/test_quantized_op",
# z3-solver build fail
"test_proxy_tensor",
# too slow on riscv64
# 53013.55 s
"functorch/test_aotdispatch",
# 25069 s
"functorch/test_ops",
# 17528 s
"test_transformers",
# 10897 s
"functorch/test_vmap",
]


# The tests inside these files should never be run in parallel with each other
RUN_PARALLEL_BLOCKLIST = [
"test_extension_utils",
Expand Down Expand Up @@ -1875,6 +1922,13 @@ def get_selected_tests(options) -> list[str]:
selected_tests,
"Skip distributed tests on s390x",
)
elif IS_RISCV64:
selected_tests = exclude_tests(RISCV64_BLOCKLIST, selected_tests, "on riscv64")
selected_tests = exclude_tests(
DISTRIBUTED_TESTS,
selected_tests,
"Skip distributed tests on riscv64",
)

# skip all distributed tests if distributed package is not available.
if not dist.is_available():
Expand Down
1 change: 1 addition & 0 deletions test/test_linalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9002,6 +9002,7 @@ def test_matrix_exp_backward_input_validation(self, device, dtype):
with self.assertRaisesRegex(RuntimeError, "must be batches of square matrices"):
torch.ops.aten.matrix_exp_backward(non_square, grad_non_square)

@slowTest
@skipCUDAIfNoMagmaAndNoLinalgsolver
@skipCPUIfNoLapack
@dtypes(torch.float, torch.double, torch.complex64, torch.complex128)
Expand Down
8 changes: 8 additions & 0 deletions test/test_tensor_creation_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
IS_SANDCASTLE,
IS_S390X,
IS_ARM64,
IS_RISCV64,
parametrize,
TEST_WITH_TORCHDYNAMO,
xfailIfTorchDynamo,
Expand Down Expand Up @@ -1109,6 +1110,13 @@ def test_float_to_int_conversion_nonfinite(self, device, dtype):

if dtype == torch.bool:
refs = (True, True, True)
elif IS_RISCV64:
if dtype in (torch.int32, torch.int64):
refs = (torch.iinfo(dtype).min, torch.iinfo(dtype).max, torch.iinfo(dtype).max)
elif dtype == torch.uint8:
refs = (0, torch.iinfo(dtype).max, torch.iinfo(dtype).max)
elif dtype in (torch.int8, torch.int16):
refs = (0, -1, -1)
elif IS_ARM64:
refs = (torch.iinfo(dtype).min, torch.iinfo(dtype).max, 0)
if dtype in (torch.int8, torch.int16):
Expand Down
11 changes: 9 additions & 2 deletions test/test_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
wrapDeterministicFlagAPITest, DeterministicGuard, CudaSyncGuard,
bytes_to_scalar, parametrize, noncontiguous_like,
AlwaysWarnTypedStorageRemoval, TEST_WITH_TORCHDYNAMO, xfailIfTorchDynamo,
xfailIfS390X, set_warn_always_context, decorateIf, isRocmArchAnyOf)
xfailIfS390X, xfailIfRISCV, set_warn_always_context, decorateIf, isRocmArchAnyOf)
from multiprocessing.reduction import ForkingPickler
from torch.testing._internal.common_device_type import (
expectedFailureMeta,
Expand Down Expand Up @@ -9594,14 +9594,21 @@ def test_type(self):

# FIXME: port to a quantization test suite
@xfailIfS390X
@xfailIfRISCV
def test_qengine(self):
qengines = torch.backends.quantized.supported_engines
if not qengines:
self.skipTest("No quantized engines supported on this platform")
original_qe = torch.backends.quantized.engine
for qe in qengines:
torch.backends.quantized.engine = qe
if torch.backends.quantized.engine != qe:
raise AssertionError(f"qengine not set successfully: expected {qe}, got {torch.backends.quantized.engine}")
torch.backends.quantized.engine = original_qe
# On platforms where no qengine is compiled in as the default (e.g. RISC-V),
# the initial engine reads as "none" (NoQEngine), which is not a valid value
# to pass back to _set_qengine. Only restore if it was a real engine.
if original_qe != "none":
torch.backends.quantized.engine = original_qe

def test_terminate_handler_on_crash(self):
cmd = [sys.executable, '-c', "import os; os.environ[\"TORCH_CUSTOM_TERMINATE\"] ='1'; \
Expand Down
2 changes: 1 addition & 1 deletion third_party/kineto
Submodule kineto updated 55 files
+9 −0 .github/scripts/config_cpu.sh
+15 −0 .github/scripts/config_cuda.sh
+12 −0 .github/scripts/config_rocm.sh
+9 −0 .github/scripts/config_xpu.sh
+3 −7 .github/scripts/setup.sh
+0 −3 .github/workflows/linux_cpu_kineto.yml
+0 −3 .github/workflows/linux_cpu_pytorch.yml
+0 −3 .github/workflows/linux_cuda_kineto.yml
+0 −3 .github/workflows/linux_cuda_pytorch.yml
+0 −3 .github/workflows/mac_cpu.yml
+0 −3 .gitmodules
+1 −1 benchmarks/CMakeLists.txt
+41 −13 libkineto/CMakeLists.txt
+8 −3 libkineto/include/AbstractConfig.h
+22 −2 libkineto/include/Config.h
+4 −0 libkineto/include/IActivityProfiler.h
+4 −1 libkineto/src/AbstractConfig.cpp
+1 −1 libkineto/src/ApproximateClock.cpp
+36 −1 libkineto/src/Config.cpp
+86 −2 libkineto/src/ConfigLoader.cpp
+6 −0 libkineto/src/ConfigLoader.h
+1 −1 libkineto/src/CuptiActivity.h
+16 −18 libkineto/src/CuptiActivityApi.cpp
+5 −4 libkineto/src/CuptiActivityProfiler.cpp
+2 −2 libkineto/src/CuptiCallbackApi.cpp
+39 −10 libkineto/src/CuptiCbidRegistry.cpp
+12 −0 libkineto/src/CuptiCbidRegistry.h
+3 −3 libkineto/src/CuptiRangeProfiler.cpp
+4 −0 libkineto/src/CuptiRangeProfilerConfig.h
+12 −12 libkineto/src/GenericActivityProfiler.cpp
+9 −13 libkineto/src/GenericActivityProfiler.h
+3 −15 libkineto/src/Logger.h
+0 −2 libkineto/src/RocmActivityProfiler.h
+0 −131 libkineto/src/RocmStreamQueue.h
+0 −14 libkineto/src/RocprofActivityApi.cpp
+4 −4 libkineto/src/RocprofLogger.cpp
+0 −24 libkineto/src/RoctracerActivityApi.cpp
+5 −4 libkineto/src/RoctracerLogger.cpp
+453 −33 libkineto/src/cupti_strings.cpp
+1 −4 libkineto/src/cupti_strings.h
+1 −1 libkineto/src/output_csv.cpp
+5 −5 libkineto/src/output_json.cpp
+12 −9 libkineto/src/plugin/xpupti/XpuptiActivityProfilerSession.cpp
+4 −0 libkineto/src/plugin/xpupti/XpuptiScopeProfilerConfig.h
+6 −0 libkineto/test/ApproximateClockTest.cpp
+1 −1 libkineto/test/CMakeLists.txt
+52 −0 libkineto/test/ConfigTest.cpp
+1 −1 libkineto/test/CuptiRangeProfilerConfigTest.cpp
+15 −49 libkineto/test/CuptiStringsTest.cpp
+0 −123 libkineto/test/LoggerObserverTest.cpp
+11 −189 libkineto/test/RocmActivityProfilerTest.cpp
+1 −1 libkineto/test/xpupti/CMakeLists.txt
+1 −1 libkineto/test/xpupti/XpuptiScopeProfilerConfigTest.cpp
+1 −1 libkineto/test/xpupti/compute/CMakeLists.txt
+0 −1 libkineto/third_party/json
Loading