From 1428de9bc95330fde7f4ffa83a6aee9ac580f4ae Mon Sep 17 00:00:00 2001
From: Ihor Solodrai <ihor.solodrai@linux.dev>
Date: Mon, 23 Feb 2026 15:25:48 -0800
Subject: [PATCH 1/2] scripts: Factor out install-github-cli.sh

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
---
 .github/scripts/download-gh-release.sh | 15 +++------------
 .github/scripts/install-github-cli.sh  | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 12 deletions(-)
 create mode 100755 .github/scripts/install-github-cli.sh

diff --git a/.github/scripts/download-gh-release.sh b/.github/scripts/download-gh-release.sh
index 291eed83..9e528ab2 100755
--- a/.github/scripts/download-gh-release.sh
+++ b/.github/scripts/download-gh-release.sh
@@ -2,23 +2,14 @@
 
 set -euo pipefail
 
+SCRIPT_DIR=$(dirname "$(realpath "$0")")
+
 GH_REPO=$1
 INSTALL_DIR=$(realpath $2)
 
 cd /tmp
 
-if ! command -v gh &> /dev/null; then
-    # https://github.com/cli/cli/blob/trunk/docs/install_linux.md
-    (type -p wget >/dev/null || (sudo apt update && sudo apt install wget -y)) \
-	&& sudo mkdir -p -m 755 /etc/apt/keyrings \
-	&& out=$(mktemp) && wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg \
-	&& cat $out | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
-	&& sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
-	&& sudo mkdir -p -m 755 /etc/apt/sources.list.d \
-	&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
-	&& sudo apt update \
-	&& sudo apt install gh -y
-fi
+bash "$SCRIPT_DIR/install-github-cli.sh"
 
 tag=$(gh release list -L 1 -R ${GH_REPO} --json tagName -q .[].tagName)
 if [[ -z "$tag" ]]; then
diff --git a/.github/scripts/install-github-cli.sh b/.github/scripts/install-github-cli.sh
new file mode 100755
index 00000000..6008d88f
--- /dev/null
+++ b/.github/scripts/install-github-cli.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -euo pipefail
+
+if ! command -v gh &> /dev/null; then
+    # https://github.com/cli/cli/blob/trunk/docs/install_linux.md
+    (type -p wget >/dev/null || (sudo apt update && sudo apt install wget -y)) \
+	&& sudo mkdir -p -m 755 /etc/apt/keyrings \
+	&& out=$(mktemp) && wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+	&& cat $out | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
+	&& sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
+	&& sudo mkdir -p -m 755 /etc/apt/sources.list.d \
+	&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
+	&& sudo apt update \
+	&& sudo apt install gh -y
+fi

From e28f29dde69cb6ccc39f0c89709126cb46c524da Mon Sep 17 00:00:00 2001
From: Ihor Solodrai <ihor.solodrai@linux.dev>
Date: Mon, 23 Feb 2026 17:01:54 -0800
Subject: [PATCH 2/2] ci/diffs: Update temporary CI patches

Remove all previous temporary diffs.

Add a patch for tailcalls on s390x:
https://lore.kernel.org/bpf/20260217161058.101346-1-iii@linux.ibm.com/

Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
---
 ...f-work-around-latest-Clang-smartness.patch |  31 --
 ...g_parsing-Ensure-data-is-flushed-to-.patch |  33 --
 ...f-make-arg_parsing.c-more-robust-to-.patch |  56 ---
 ...20260127-Fix-static-build-of-bpftool.patch |  28 --
 ..._ext-Fix-init_enable_count-flakiness.patch | 105 ------
 ...-Prevent-live-lock-on-task-to-CPU-mo.patch | 300 ---------------
 ...-Protect-transition-on-weakly-ordere.patch | 355 ------------------
 ...-Drop-per-CPU-CID-immediately-when-s.patch |  49 ---
 ...-Optimize-transitional-CIDs-when-sch.patch |  80 ----
 ...-Rename-do_filp_open-to-do_file_open.patch |  55 ---
 ...t-assume-CID-is-CPU-owned-on-mode-sw.patch |  83 ----
 ...increment-tailcall-count-when-prog-i.patch |  66 ++++
 12 files changed, 66 insertions(+), 1175 deletions(-)
 delete mode 100644 ci/diffs/0001-selftests-bpf-work-around-latest-Clang-smartness.patch
 delete mode 100644 ci/diffs/20251014-selftests-arg_parsing-Ensure-data-is-flushed-to-.patch
 delete mode 100644 ci/diffs/20251014-selftests-bpf-make-arg_parsing.c-more-robust-to-.patch
 delete mode 100644 ci/diffs/20260127-Fix-static-build-of-bpftool.patch
 delete mode 100644 ci/diffs/20260202-selftests-sched_ext-Fix-init_enable_count-flakiness.patch
 delete mode 100644 ci/diffs/202602021-sched-mmcid-Prevent-live-lock-on-task-to-CPU-mo.patch
 delete mode 100644 ci/diffs/202602022-sched-mmcid-Protect-transition-on-weakly-ordere.patch
 delete mode 100644 ci/diffs/202602023-sched-mmcid-Drop-per-CPU-CID-immediately-when-s.patch
 delete mode 100644 ci/diffs/202602024-sched-mmcid-Optimize-transitional-CIDs-when-sch.patch
 delete mode 100644 ci/diffs/20260210-selftests-bpf-Rename-do_filp_open-to-do_file_open.patch
 delete mode 100644 ci/diffs/20260212-sched-mmcid-Don-t-assume-CID-is-CPU-owned-on-mode-sw.patch
 create mode 100644 ci/diffs/20260223-s390-bpf-Do-not-increment-tailcall-count-when-prog-i.patch

diff --git a/ci/diffs/0001-selftests-bpf-work-around-latest-Clang-smartness.patch b/ci/diffs/0001-selftests-bpf-work-around-latest-Clang-smartness.patch
deleted file mode 100644
index ec1e29a8..00000000
--- a/ci/diffs/0001-selftests-bpf-work-around-latest-Clang-smartness.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From d31a7125891994681503770cff46a119692fb2b9 Mon Sep 17 00:00:00 2001
-From: Andrii Nakryiko <andrii@kernel.org>
-Date: Mon, 11 Dec 2023 17:09:38 -0800
-Subject: [PATCH 1/1] selftests/bpf: work around latest Clang smartness
-
-Work around the issue while we deal with it in the Clang itself.
-See [0].
-
-  [0] https://github.com/llvm/llvm-project/pull/73662#issuecomment-1849281758
-
-Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
----
- tools/testing/selftests/bpf/progs/iters.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
-index 3aca3dc145b5..929ba6fa2105 100644
---- a/tools/testing/selftests/bpf/progs/iters.c
-+++ b/tools/testing/selftests/bpf/progs/iters.c
-@@ -1420,7 +1420,7 @@ SEC("raw_tp")
- __success
- int iter_arr_with_actual_elem_count(const void *ctx)
- {
--	int i, n = loop_data.n, sum = 0;
-+	unsigned i, n = loop_data.n, sum = 0;
- 
- 	if (n > ARRAY_SIZE(loop_data.data))
- 		return 0;
--- 
-2.34.1
-
diff --git a/ci/diffs/20251014-selftests-arg_parsing-Ensure-data-is-flushed-to-.patch b/ci/diffs/20251014-selftests-arg_parsing-Ensure-data-is-flushed-to-.patch
deleted file mode 100644
index efcdbeed..00000000
--- a/ci/diffs/20251014-selftests-arg_parsing-Ensure-data-is-flushed-to-.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From 423112d2e9b591999efa4ad74000f8f6f3f381ea Mon Sep 17 00:00:00 2001
-From: Xing Guo <higuoxing@gmail.com>
-Date: Tue, 14 Oct 2025 16:03:23 +0800
-Subject: [PATCH 20251015/20251015] selftests: arg_parsing: Ensure data is
- flushed to disk before reading.
-
-Recently, I noticed a selftest failure in my local environment. The
-test_parse_test_list_file writes some data to
-/tmp/bpf_arg_parsing_test.XXXXXX and parse_test_list_file() will read
-the data back.  However, after writing data to that file, we forget to
-call fsync() and it's causing testing failure in my laptop.  This patch
-helps fix it by adding the missing fsync() call.
-
-Signed-off-by: Xing Guo <higuoxing@gmail.com>
----
- tools/testing/selftests/bpf/prog_tests/arg_parsing.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
-index fbf0d9c2f58b..d9fcbfb72aaa 100644
---- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
-+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
-@@ -140,6 +140,7 @@ static void test_parse_test_list_file(void)
- 	fprintf(fp, "testA/subtest2\n");
- 	fprintf(fp, "testC_no_eof_newline");
- 	fflush(fp);
-+	fsync(fd);
- 
- 	if (!ASSERT_OK(ferror(fp), "prepare tmp"))
- 		goto out_fclose;
--- 
-2.51.0
-
diff --git a/ci/diffs/20251014-selftests-bpf-make-arg_parsing.c-more-robust-to-.patch b/ci/diffs/20251014-selftests-bpf-make-arg_parsing.c-more-robust-to-.patch
deleted file mode 100644
index 34104b3f..00000000
--- a/ci/diffs/20251014-selftests-bpf-make-arg_parsing.c-more-robust-to-.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 8a03969566c5447aa72469e8f09b8158e3dad8f9 Mon Sep 17 00:00:00 2001
-From: Andrii Nakryiko <andrii@kernel.org>
-Date: Tue, 14 Oct 2025 13:20:37 -0700
-Subject: [PATCH 20251014/20251015] selftests/bpf: make arg_parsing.c more
- robust to crashes
-
-We started getting a crash in BPF CI, which seems to originate from
-test_parse_test_list_file() test and is happening at this line:
-
-  ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
-
-One way we can crash there is if set.cnt zero, which is checked for with
-ASSERT_EQ() above, but we proceed after this regardless of the outcome.
-Instead of crashing, we should bail out with test failure early.
-
-Similarly, if parse_test_list_file() fails, we shouldn't be even looking
-at set, so bail even earlier if ASSERT_OK() fails.
-
-Fixes: 64276f01dce8 ("selftests/bpf: Test_progs can read test lists from file")
-Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
----
- tools/testing/selftests/bpf/prog_tests/arg_parsing.c | 9 ++++++---
- 1 file changed, 6 insertions(+), 3 deletions(-)
-
-diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
-index bb143de68875..fbf0d9c2f58b 100644
---- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
-+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
-@@ -146,9 +146,12 @@ static void test_parse_test_list_file(void)
- 
- 	init_test_filter_set(&set);
- 
--	ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file");
-+	if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"))
-+		goto out_fclose;
-+
-+	if (!ASSERT_EQ(set.cnt, 4, "test  count"))
-+		goto out_free_set;
- 
--	ASSERT_EQ(set.cnt, 4, "test  count");
- 	ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
- 	ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
- 	ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
-@@ -158,8 +161,8 @@ static void test_parse_test_list_file(void)
- 	ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
- 	ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
- 
-+out_free_set:
- 	free_test_filter_set(&set);
--
- out_fclose:
- 	fclose(fp);
- out_remove:
--- 
-2.51.0
-
diff --git a/ci/diffs/20260127-Fix-static-build-of-bpftool.patch b/ci/diffs/20260127-Fix-static-build-of-bpftool.patch
deleted file mode 100644
index 3f3fa8f9..00000000
--- a/ci/diffs/20260127-Fix-static-build-of-bpftool.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 2a4dffd83945d770fbfbef022f25c3c9ce71b6a5 Mon Sep 17 00:00:00 2001
-From: Ihor Solodrai <ihor.solodrai@linux.dev>
-Date: Tue, 27 Jan 2026 12:27:30 -0800
-Subject: [PATCH] Fix static build of bpftool
-
-Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
----
- tools/bpf/bpftool/Makefile | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
-index 5442073a2e42..519ea5cb8ab1 100644
---- a/tools/bpf/bpftool/Makefile
-+++ b/tools/bpf/bpftool/Makefile
-@@ -130,8 +130,8 @@ include $(FEATURES_DUMP)
- endif
- endif
- 
--LIBS = $(LIBBPF) -lelf -lz -lcrypto
--LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz -lcrypto
-+LIBS = $(LIBBPF) -lelf -lcrypto -lz
-+LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lcrypto -lz
- 
- ifeq ($(feature-libelf-zstd),1)
- LIBS += -lzstd
--- 
-2.52.0
-
diff --git a/ci/diffs/20260202-selftests-sched_ext-Fix-init_enable_count-flakiness.patch b/ci/diffs/20260202-selftests-sched_ext-Fix-init_enable_count-flakiness.patch
deleted file mode 100644
index b2d11b70..00000000
--- a/ci/diffs/20260202-selftests-sched_ext-Fix-init_enable_count-flakiness.patch
+++ /dev/null
@@ -1,105 +0,0 @@
-From 4544e9c4ec9a5955a37fdd8204a3d98106f97ab7 Mon Sep 17 00:00:00 2001
-From: Tejun Heo <tj@kernel.org>
-Date: Mon, 2 Feb 2026 09:40:22 -1000
-Subject: [PATCH] selftests/sched_ext: Fix init_enable_count flakiness
-
-The init_enable_count test is flaky. The test forks 1024 children before
-attaching the scheduler to verify that existing tasks get ops.init_task()
-called. The children were using sleep(1) before exiting.
-
-7900aa699c34 ("sched_ext: Fix cgroup exit ordering by moving sched_ext_free()
-to finish_task_switch()") changed when tasks are removed from scx_tasks -
-previously when the task_struct was freed, now immediately in
-finish_task_switch() when the task dies.
-
-Before the commit, pre-forked children would linger on scx_tasks until freed
-regardless of when they exited, so the scheduler would always see them during
-iteration. The sleep(1) was unnecessary. After the commit, children are
-removed as soon as they die. The sleep(1) masks the problem in most cases but
-the test becomes flaky depending on timing.
-
-Fix by synchronizing properly using a pipe. All children block on read() and
-the parent signals them to exit by closing the write end after attaching the
-scheduler. The children are auto-reaped so there's no need to wait on them.
-
-Reported-by: Ihor Solodrai <ihor.solodrai@linux.dev>
-Cc: David Vernet <void@manifault.com>
-Cc: Andrea Righi <arighi@nvidia.com>
-Cc: Changwoo Min <changwoo@igalia.com>
-Cc: Emil Tsalapatis <emil@etsalapatis.com>
-Signed-off-by: Tejun Heo <tj@kernel.org>
----
- .../selftests/sched_ext/init_enable_count.c   | 34 +++++++++++++------
- 1 file changed, 23 insertions(+), 11 deletions(-)
-
-diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c
-index eddf9e0e26e7..82c71653977b 100644
---- a/tools/testing/selftests/sched_ext/init_enable_count.c
-+++ b/tools/testing/selftests/sched_ext/init_enable_count.c
-@@ -4,6 +4,7 @@
-  * Copyright (c) 2023 David Vernet <dvernet@meta.com>
-  * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
-  */
-+#include <signal.h>
- #include <stdio.h>
- #include <unistd.h>
- #include <sched.h>
-@@ -23,6 +24,9 @@ static enum scx_test_status run_test(bool global)
- 	int ret, i, status;
- 	struct sched_param param = {};
- 	pid_t pids[num_pre_forks];
-+	int pipe_fds[2];
-+
-+	SCX_FAIL_IF(pipe(pipe_fds) < 0, "Failed to create pipe");
- 
- 	skel = init_enable_count__open();
- 	SCX_FAIL_IF(!skel, "Failed to open");
-@@ -38,26 +42,34 @@ static enum scx_test_status run_test(bool global)
- 	 * ensure (at least in practical terms) that there are more tasks that
- 	 * transition from SCHED_OTHER -> SCHED_EXT than there are tasks that
- 	 * take the fork() path either below or in other processes.
-+	 *
-+	 * All children will block on read() on the pipe until the parent closes
-+	 * the write end after attaching the scheduler, which signals all of
-+	 * them to exit simultaneously. Auto-reap so we don't have to wait on
-+	 * them.
- 	 */
-+	signal(SIGCHLD, SIG_IGN);
- 	for (i = 0; i < num_pre_forks; i++) {
--		pids[i] = fork();
--		SCX_FAIL_IF(pids[i] < 0, "Failed to fork child");
--		if (pids[i] == 0) {
--			sleep(1);
-+		pid_t pid = fork();
-+
-+		SCX_FAIL_IF(pid < 0, "Failed to fork child");
-+		if (pid == 0) {
-+			char buf;
-+
-+			close(pipe_fds[1]);
-+			read(pipe_fds[0], &buf, 1);
-+			close(pipe_fds[0]);
- 			exit(0);
- 		}
- 	}
-+	close(pipe_fds[0]);
- 
- 	link = bpf_map__attach_struct_ops(skel->maps.init_enable_count_ops);
- 	SCX_FAIL_IF(!link, "Failed to attach struct_ops");
- 
--	for (i = 0; i < num_pre_forks; i++) {
--		SCX_FAIL_IF(waitpid(pids[i], &status, 0) != pids[i],
--			    "Failed to wait for pre-forked child\n");
--
--		SCX_FAIL_IF(status != 0, "Pre-forked child %d exited with status %d\n", i,
--			    status);
--	}
-+	/* Signal all pre-forked children to exit. */
-+	close(pipe_fds[1]);
-+	signal(SIGCHLD, SIG_DFL);
- 
- 	bpf_link__destroy(link);
- 	SCX_GE(skel->bss->init_task_cnt, num_pre_forks);
--- 
-2.52.0
-
diff --git a/ci/diffs/202602021-sched-mmcid-Prevent-live-lock-on-task-to-CPU-mo.patch b/ci/diffs/202602021-sched-mmcid-Prevent-live-lock-on-task-to-CPU-mo.patch
deleted file mode 100644
index bcffb252..00000000
--- a/ci/diffs/202602021-sched-mmcid-Prevent-live-lock-on-task-to-CPU-mo.patch
+++ /dev/null
@@ -1,300 +0,0 @@
-From d6edf106f21e8be9edd79d5db40a14c78b307bf8 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@kernel.org>
-Date: Mon, 2 Feb 2026 10:39:40 +0100
-Subject: [PATCH 202602021/202602024] sched/mmcid: Prevent live lock on task to
- CPU mode transition
-
-Ihor reported a BPF CI failure which turned out to be a live lock in the
-MM_CID management. The scenario is:
-
-A test program creates the 5th thread, which means the MM_CID users become
-more than the number of CPUs (four in this example), so it switches to per
-CPU ownership mode.
-
-At this point each live task of the program has a CID associated. Assume
-thread creation order assignment for simplicity.
-
-   T0     CID0  runs fork() and creates T4
-   T1 	  CID1
-   T2 	  CID2
-   T3 	  CID3
-   T4       ---   not visible yet
-
-T0 sets mm_cid::percpu = true and transfers its own CID to CPU0 where it
-runs on and then starts the fixup which walks through the threads to
-transfer the per task CIDs either to the CPU the task is running on or drop
-it back into the pool if the task is not on a CPU.
-
-During that T1 - T3 are free to schedule in and out before the fixup caught
-up with them. Going through all possible permutations with a python script
-revealed a few problematic cases. The most trivial one is:
-
-   T1 schedules in on CPU1 and observes percpu == true, so it transfers
-      its CID to CPU1
-
-   T1 is migrated to CPU2 and schedule in observes percpu == true, but
-      CPU2 does not have a CID associated and T1 transferred its own to
-      CPU1
-
-      So it has to allocate one with CPU2 runqueue lock held, but the
-      pool is empty, so it keeps looping in mm_get_cid().
-
-Now T0 reaches T1 in the thread walk and tries to lock the corresponding
-runqueue lock, which is held causing a full live lock.
-
-There is a similar scenario in the reverse direction of switching from per
-CPU to task mode which is way more obvious and got therefore addressed by
-an intermediate mode. In this mode the CIDs are marked with MM_CID_TRANSIT,
-which means that they are neither owned by the CPU nor by the task. When a
-task schedules out with a transit CID it drops the CID back into the pool
-making it available for others to use temporarily. Once the task which
-initiated the mode switch finished the fixup it clears the transit mode and
-the process goes back into per task ownership mode.
-
-Unfortunately this insight was not mapped back to the task to CPU mode
-switch as the above described scenario was not considered in the analysis.
-
-Apply the same transit mechanism to the task to CPU mode switch to handle
-these problematic cases correctly.
-
-As with the CPU to task transition this results in a potential temporary
-contention on the CID bitmap, but that's only for the time it takes to
-complete the transition. After that it stays in steady mode which does not
-touch the bitmap at all.
-
-Fixes: fbd0e71dc370 ("sched/mmcid: Provide CID ownership mode fixup functions")
-Reported-by: Ihor Solodrai <ihor.solodrai@linux.dev>
-Signed-off-by: Thomas Gleixner <tglx@kernel.org>
-Closes: https://lore.kernel.org/2b7463d7-0f58-4e34-9775-6e2115cfb971@linux.dev
-Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
----
- kernel/sched/core.c  | 128 ++++++++++++++++++++++++++++---------------
- kernel/sched/sched.h |   4 ++
- 2 files changed, 88 insertions(+), 44 deletions(-)
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 60afadb6eede..f78966867d7b 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -10265,7 +10265,8 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
-  * Serialization rules:
-  *
-  * mm::mm_cid::mutex:	Serializes fork() and exit() and therefore
-- *			protects mm::mm_cid::users.
-+ *			protects mm::mm_cid::users and mode switch
-+ *			transitions
-  *
-  * mm::mm_cid::lock:	Serializes mm_update_max_cids() and
-  *			mm_update_cpus_allowed(). Nests in mm_cid::mutex
-@@ -10281,14 +10282,61 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
-  *
-  * A CID is either owned by a task (stored in task_struct::mm_cid.cid) or
-  * by a CPU (stored in mm::mm_cid.pcpu::cid). CIDs owned by CPUs have the
-- * MM_CID_ONCPU bit set. During transition from CPU to task ownership mode,
-- * MM_CID_TRANSIT is set on the per task CIDs. When this bit is set the
-- * task needs to drop the CID into the pool when scheduling out.  Both bits
-- * (ONCPU and TRANSIT) are filtered out by task_cid() when the CID is
-- * actually handed over to user space in the RSEQ memory.
-+ * MM_CID_ONCPU bit set.
-+ *
-+ * During the transition of ownership mode, the MM_CID_TRANSIT bit is set
-+ * on the CIDs. When this bit is set the tasks drop the CID back into the
-+ * pool when scheduling out.
-+ *
-+ * Both bits (ONCPU and TRANSIT) are filtered out by task_cid() when the
-+ * CID is actually handed over to user space in the RSEQ memory.
-  *
-  * Mode switching:
-  *
-+ * All transitions of ownership mode happen in two phases:
-+ *
-+ *  1) mm:mm_cid.transit contains MM_CID_TRANSIT. This is OR'ed on the CIDs
-+ *     and denotes that the CID is only temporarily owned by a task. When
-+ *     the task schedules out it drops the CID back into the pool if this
-+ *     bit is set.
-+ *
-+ *  2) The initiating context walks the per CPU space or the tasks to fixup
-+ *     or drop the CIDs and after completion it clears mm:mm_cid.transit.
-+ *     After that point the CIDs are strictly task or CPU owned again.
-+ *
-+ * This two phase transition is required to prevent CID space exhaustion
-+ * during the transition as a direct transfer of ownership would fail:
-+ *
-+ *   - On task to CPU mode switch if a task is scheduled in on one CPU and
-+ *     then migrated to another CPU before the fixup freed enough per task
-+ *     CIDs.
-+ *
-+ *   - On CPU to task mode switch if two tasks are scheduled in on the same
-+ *     CPU before the fixup freed per CPU CIDs.
-+ *
-+ *   Both scenarios can result in a live lock because sched_in() is invoked
-+ *   with runqueue lock held and loops in search of a CID and the fixup
-+ *   thread can't make progress freeing them up because it is stuck on the
-+ *   same runqueue lock.
-+ *
-+ * While MM_CID_TRANSIT is active during the transition phase the MM_CID
-+ * bitmap can be contended, but that's a temporary contention bound to the
-+ * transition period. After that everything goes back into steady state and
-+ * nothing except fork() and exit() will touch the bitmap. This is an
-+ * acceptable tradeoff as it completely avoids complex serialization,
-+ * memory barriers and atomic operations for the common case.
-+ *
-+ * Aside of that this mechanism also ensures RT compability:
-+ *
-+ *   - The task which runs the fixup is fully preemptible except for the
-+ *     short runqueue lock held sections.
-+ *
-+ *   - The transient impact of the bitmap contention is only problematic
-+ *     when there is a thundering herd scenario of tasks scheduling in and
-+ *     out concurrently. There is not much which can be done about that
-+ *     except for avoiding mode switching by a proper overall system
-+ *     configuration.
-+ *
-  * Switching to per CPU mode happens when the user count becomes greater
-  * than the maximum number of CIDs, which is calculated by:
-  *
-@@ -10302,12 +10350,13 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
-  *
-  * At the point of switching to per CPU mode the new user is not yet
-  * visible in the system, so the task which initiated the fork() runs the
-- * fixup function: mm_cid_fixup_tasks_to_cpu() walks the thread list and
-- * either transfers each tasks owned CID to the CPU the task runs on or
-- * drops it into the CID pool if a task is not on a CPU at that point in
-- * time. Tasks which schedule in before the task walk reaches them do the
-- * handover in mm_cid_schedin(). When mm_cid_fixup_tasks_to_cpus() completes
-- * it's guaranteed that no task related to that MM owns a CID anymore.
-+ * fixup function. mm_cid_fixup_tasks_to_cpu() walks the thread list and
-+ * either marks each task owned CID with MM_CID_TRANSIT if the task is
-+ * running on a CPU or drops it into the CID pool if a task is not on a
-+ * CPU. Tasks which schedule in before the task walk reaches them do the
-+ * handover in mm_cid_schedin(). When mm_cid_fixup_tasks_to_cpus()
-+ * completes it is guaranteed that no task related to that MM owns a CID
-+ * anymore.
-  *
-  * Switching back to task mode happens when the user count goes below the
-  * threshold which was recorded on the per CPU mode switch:
-@@ -10323,28 +10372,11 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
-  * run either in the deferred update function in context of a workqueue or
-  * by a task which forks a new one or by a task which exits. Whatever
-  * happens first. mm_cid_fixup_cpus_to_task() walks through the possible
-- * CPUs and either transfers the CPU owned CIDs to a related task which
-- * runs on the CPU or drops it into the pool. Tasks which schedule in on a
-- * CPU which the walk did not cover yet do the handover themself.
-- *
-- * This transition from CPU to per task ownership happens in two phases:
-- *
-- *  1) mm:mm_cid.transit contains MM_CID_TRANSIT This is OR'ed on the task
-- *     CID and denotes that the CID is only temporarily owned by the
-- *     task. When it schedules out the task drops the CID back into the
-- *     pool if this bit is set.
-- *
-- *  2) The initiating context walks the per CPU space and after completion
-- *     clears mm:mm_cid.transit. So after that point the CIDs are strictly
-- *     task owned again.
-- *
-- * This two phase transition is required to prevent CID space exhaustion
-- * during the transition as a direct transfer of ownership would fail if
-- * two tasks are scheduled in on the same CPU before the fixup freed per
-- * CPU CIDs.
-- *
-- * When mm_cid_fixup_cpus_to_tasks() completes it's guaranteed that no CID
-- * related to that MM is owned by a CPU anymore.
-+ * CPUs and either marks the CPU owned CIDs with MM_CID_TRANSIT if a
-+ * related task is running on the CPU or drops it into the pool. Tasks
-+ * which are scheduled in before the fixup covered them do the handover
-+ * themself. When mm_cid_fixup_cpus_to_tasks() completes it is guaranteed
-+ * that no CID related to that MM is owned by a CPU anymore.
-  */
- 
- /*
-@@ -10396,9 +10428,9 @@ static bool mm_update_max_cids(struct mm_struct *mm)
- 	/* Mode change required? */
- 	if (!!mc->percpu == !!mc->pcpu_thrs)
- 		return false;
--	/* When switching back to per TASK mode, set the transition flag */
--	if (!mc->pcpu_thrs)
--		WRITE_ONCE(mc->transit, MM_CID_TRANSIT);
-+
-+	/* Set the transition flag to bridge the transfer */
-+	WRITE_ONCE(mc->transit, MM_CID_TRANSIT);
- 	WRITE_ONCE(mc->percpu, !!mc->pcpu_thrs);
- 	return true;
- }
-@@ -10489,10 +10521,10 @@ static void mm_cid_fixup_cpus_to_tasks(struct mm_struct *mm)
- 	WRITE_ONCE(mm->mm_cid.transit, 0);
- }
- 
--static inline void mm_cid_transfer_to_cpu(struct task_struct *t, struct mm_cid_pcpu *pcp)
-+static inline void mm_cid_transit_to_cpu(struct task_struct *t, struct mm_cid_pcpu *pcp)
- {
- 	if (cid_on_task(t->mm_cid.cid)) {
--		t->mm_cid.cid = cid_to_cpu_cid(t->mm_cid.cid);
-+		t->mm_cid.cid = cid_to_transit_cid(t->mm_cid.cid);
- 		pcp->cid = t->mm_cid.cid;
- 	}
- }
-@@ -10505,18 +10537,17 @@ static bool mm_cid_fixup_task_to_cpu(struct task_struct *t, struct mm_struct *mm
- 	if (!t->mm_cid.active)
- 		return false;
- 	if (cid_on_task(t->mm_cid.cid)) {
--		/* If running on the CPU, transfer the CID, otherwise drop it */
-+		/* If running on the CPU, put the CID in transit mode, otherwise drop it */
- 		if (task_rq(t)->curr == t)
--			mm_cid_transfer_to_cpu(t, per_cpu_ptr(mm->mm_cid.pcpu, task_cpu(t)));
-+			mm_cid_transit_to_cpu(t, per_cpu_ptr(mm->mm_cid.pcpu, task_cpu(t)));
- 		else
- 			mm_unset_cid_on_task(t);
- 	}
- 	return true;
- }
- 
--static void mm_cid_fixup_tasks_to_cpus(void)
-+static void mm_cid_do_fixup_tasks_to_cpus(struct mm_struct *mm)
- {
--	struct mm_struct *mm = current->mm;
- 	struct task_struct *p, *t;
- 	unsigned int users;
- 
-@@ -10554,6 +10585,15 @@ static void mm_cid_fixup_tasks_to_cpus(void)
- 	}
- }
- 
-+static void mm_cid_fixup_tasks_to_cpus(void)
-+{
-+	struct mm_struct *mm = current->mm;
-+
-+	mm_cid_do_fixup_tasks_to_cpus(mm);
-+	/* Clear the transition bit */
-+	WRITE_ONCE(mm->mm_cid.transit, 0);
-+}
-+
- static bool sched_mm_cid_add_user(struct task_struct *t, struct mm_struct *mm)
- {
- 	t->mm_cid.active = 1;
-@@ -10592,7 +10632,7 @@ void sched_mm_cid_fork(struct task_struct *t)
- 		if (!percpu)
- 			mm_cid_transit_to_task(current, pcp);
- 		else
--			mm_cid_transfer_to_cpu(current, pcp);
-+			mm_cid_transit_to_cpu(current, pcp);
- 	}
- 
- 	if (percpu) {
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index d30cca6870f5..96f613d7d181 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -3818,6 +3818,10 @@ static __always_inline void mm_cid_from_cpu(struct task_struct *t, unsigned int
- 		/* Still nothing, allocate a new one */
- 		if (!cid_on_cpu(cpu_cid))
- 			cpu_cid = cid_to_cpu_cid(mm_get_cid(mm));
-+
-+		/* Set the transition mode flag if required */
-+		if (READ_ONCE(mm->mm_cid.transit))
-+			cpu_cid = cpu_cid_to_cid(cpu_cid) | MM_CID_TRANSIT;
- 	}
- 	mm_cid_update_pcpu_cid(mm, cpu_cid);
- 	mm_cid_update_task_cid(t, cpu_cid);
--- 
-2.52.0
-
diff --git a/ci/diffs/202602022-sched-mmcid-Protect-transition-on-weakly-ordere.patch b/ci/diffs/202602022-sched-mmcid-Protect-transition-on-weakly-ordere.patch
deleted file mode 100644
index 364c1528..00000000
--- a/ci/diffs/202602022-sched-mmcid-Protect-transition-on-weakly-ordere.patch
+++ /dev/null
@@ -1,355 +0,0 @@
-From 683cdd5667e8fc973ab52933deeb270a036bfa7a Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@kernel.org>
-Date: Mon, 2 Feb 2026 10:39:45 +0100
-Subject: [PATCH 202602022/202602024] sched/mmcid: Protect transition on weakly
- ordered systems
-
-Shrikanth reported a hard lockup which he observed once. The stack trace
-shows the following CID related participants:
-
-  watchdog: CPU 23 self-detected hard LOCKUP @ mm_get_cid+0xe8/0x188
-  NIP: mm_get_cid+0xe8/0x188
-  LR:  mm_get_cid+0x108/0x188
-   mm_cid_switch_to+0x3c4/0x52c
-   __schedule+0x47c/0x700
-   schedule_idle+0x3c/0x64
-   do_idle+0x160/0x1b0
-   cpu_startup_entry+0x48/0x50
-   start_secondary+0x284/0x288
-   start_secondary_prolog+0x10/0x14
-
-  watchdog: CPU 11 self-detected hard LOCKUP @ plpar_hcall_norets_notrace+0x18/0x2c
-  NIP: plpar_hcall_norets_notrace+0x18/0x2c
-  LR:  queued_spin_lock_slowpath+0xd88/0x15d0
-   _raw_spin_lock+0x80/0xa0
-   raw_spin_rq_lock_nested+0x3c/0xf8
-   mm_cid_fixup_cpus_to_tasks+0xc8/0x28c
-   sched_mm_cid_exit+0x108/0x22c
-   do_exit+0xf4/0x5d0
-   make_task_dead+0x0/0x178
-   system_call_exception+0x128/0x390
-   system_call_vectored_common+0x15c/0x2ec
-
-The task on CPU11 is running the CID ownership mode change fixup function
-and is stuck on a runqueue lock. The task on CPU23 is trying to get a CID
-from the pool with the same runqueue lock held, but the pool is empty.
-
-After decoding a similar issue in the opposite direction switching from per
-task to per CPU mode the tool which models the possible scenarios failed to
-come up with a similar loop hole.
-
-This showed up only once, was not reproducible and according to tooling not
-related to a overlooked scheduling scenario permutation. But the fact that
-it was observed on a PowerPC system gave the right hint: PowerPC is a
-weakly ordered architecture.
-
-The transition mechanism does:
-
-    WRITE_ONCE(mm->mm_cid.transit, MM_CID_TRANSIT);
-    WRITE_ONCE(mm->mm_cid.percpu, new_mode);
-
-    fixup()
-
-    WRITE_ONCE(mm->mm_cid.transit, 0);
-
-mm_cid_schedin() does:
-
-    if (!READ_ONCE(mm->mm_cid.percpu))
-       ...
-       cid |= READ_ONCE(mm->mm_cid.transit);
-
-so weakly ordered systems can observe percpu == false and transit == 0 even
-if the fixup function has not yet completed. As a consequence the task will
-not drop the CID when scheduling out before the fixup is completed, which
-means the CID space can be exhausted and the next task scheduling in will
-loop in mm_get_cid() and the fixup thread can livelock on the held runqueue
-lock as above.
-
-This could obviously be solved by using:
-     smp_store_release(&mm->mm_cid.percpu, true);
-and
-     smp_load_acquire(&mm->mm_cid.percpu);
-
-but that brings a memory barrier back into the scheduler hotpath, which was
-just designed out by the CID rewrite.
-
-That can be completely avoided by combining the per CPU mode and the
-transit storage into a single mm_cid::mode member and ordering the stores
-against the fixup functions to prevent the CPU from reordering them.
-
-That makes the update of both states atomic and a concurrent read observes
-always consistent state.
-
-The price is an additional AND operation in mm_cid_schedin() to evaluate
-the per CPU or the per task path, but that's in the noise even on strongly
-ordered architectures as the actual load can be significantly more
-expensive and the conditional branch evaluation is there anyway.
-
-Fixes: fbd0e71dc370 ("sched/mmcid: Provide CID ownership mode fixup functions")
-Reported-by: Shrikanth Hegde <sshegde@linux.ibm.com>
-Signed-off-by: Thomas Gleixner <tglx@kernel.org>
-Closes: https://lore.kernel.org/bdfea828-4585-40e8-8835-247c6a8a76b0@linux.ibm.com
-Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
----
- include/linux/rseq_types.h |  6 ++--
- kernel/sched/core.c        | 66 +++++++++++++++++++++++++-------------
- kernel/sched/sched.h       | 21 ++++++------
- 3 files changed, 58 insertions(+), 35 deletions(-)
-
-diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h
-index 332dc14b81c9..ef0811379c54 100644
---- a/include/linux/rseq_types.h
-+++ b/include/linux/rseq_types.h
-@@ -121,8 +121,7 @@ struct mm_cid_pcpu {
- /**
-  * struct mm_mm_cid - Storage for per MM CID data
-  * @pcpu:		Per CPU storage for CIDs associated to a CPU
-- * @percpu:		Set, when CIDs are in per CPU mode
-- * @transit:		Set to MM_CID_TRANSIT during a mode change transition phase
-+ * @mode:		Indicates per CPU and transition mode
-  * @max_cids:		The exclusive maximum CID value for allocation and convergence
-  * @irq_work:		irq_work to handle the affinity mode change case
-  * @work:		Regular work to handle the affinity mode change case
-@@ -139,8 +138,7 @@ struct mm_cid_pcpu {
- struct mm_mm_cid {
- 	/* Hotpath read mostly members */
- 	struct mm_cid_pcpu	__percpu *pcpu;
--	unsigned int		percpu;
--	unsigned int		transit;
-+	unsigned int		mode;
- 	unsigned int		max_cids;
- 
- 	/* Rarely used. Moves @lock and @mutex into the second cacheline */
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index f78966867d7b..f429ff4eb84c 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -10293,16 +10293,25 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
-  *
-  * Mode switching:
-  *
-+ * The ownership mode is per process and stored in mm:mm_cid::mode with the
-+ * following possible states:
-+ *
-+ *	0:				Per task ownership
-+ *	0 | MM_CID_TRANSIT:		Transition from per CPU to per task
-+ *	MM_CID_ONCPU:			Per CPU ownership
-+ *	MM_CID_ONCPU | MM_CID_TRANSIT:	Transition from per task to per CPU
-+ *
-  * All transitions of ownership mode happen in two phases:
-  *
-- *  1) mm:mm_cid.transit contains MM_CID_TRANSIT. This is OR'ed on the CIDs
-- *     and denotes that the CID is only temporarily owned by a task. When
-- *     the task schedules out it drops the CID back into the pool if this
-- *     bit is set.
-+ *  1) mm:mm_cid::mode has the MM_CID_TRANSIT bit set. This is OR'ed on the
-+ *     CIDs and denotes that the CID is only temporarily owned by a
-+ *     task. When the task schedules out it drops the CID back into the
-+ *     pool if this bit is set.
-  *
-  *  2) The initiating context walks the per CPU space or the tasks to fixup
-- *     or drop the CIDs and after completion it clears mm:mm_cid.transit.
-- *     After that point the CIDs are strictly task or CPU owned again.
-+ *     or drop the CIDs and after completion it clears MM_CID_TRANSIT in
-+ *     mm:mm_cid::mode. After that point the CIDs are strictly task or CPU
-+ *     owned again.
-  *
-  * This two phase transition is required to prevent CID space exhaustion
-  * during the transition as a direct transfer of ownership would fail:
-@@ -10407,6 +10416,7 @@ static inline unsigned int mm_cid_calc_pcpu_thrs(struct mm_mm_cid *mc)
- static bool mm_update_max_cids(struct mm_struct *mm)
- {
- 	struct mm_mm_cid *mc = &mm->mm_cid;
-+	bool percpu = cid_on_cpu(mc->mode);
- 
- 	lockdep_assert_held(&mm->mm_cid.lock);
- 
-@@ -10415,7 +10425,7 @@ static bool mm_update_max_cids(struct mm_struct *mm)
- 	__mm_update_max_cids(mc);
- 
- 	/* Check whether owner mode must be changed */
--	if (!mc->percpu) {
-+	if (!percpu) {
- 		/* Enable per CPU mode when the number of users is above max_cids */
- 		if (mc->users > mc->max_cids)
- 			mc->pcpu_thrs = mm_cid_calc_pcpu_thrs(mc);
-@@ -10426,12 +10436,17 @@ static bool mm_update_max_cids(struct mm_struct *mm)
- 	}
- 
- 	/* Mode change required? */
--	if (!!mc->percpu == !!mc->pcpu_thrs)
-+	if (percpu == !!mc->pcpu_thrs)
- 		return false;
- 
--	/* Set the transition flag to bridge the transfer */
--	WRITE_ONCE(mc->transit, MM_CID_TRANSIT);
--	WRITE_ONCE(mc->percpu, !!mc->pcpu_thrs);
-+	/* Flip the mode and set the transition flag to bridge the transfer */
-+	WRITE_ONCE(mc->mode, mc->mode ^ (MM_CID_TRANSIT | MM_CID_ONCPU));
-+	/*
-+	 * Order the store against the subsequent fixups so that
-+	 * acquire(rq::lock) cannot be reordered by the CPU before the
-+	 * store.
-+	 */
-+	smp_mb();
- 	return true;
- }
- 
-@@ -10456,7 +10471,7 @@ static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpu
- 
- 	WRITE_ONCE(mc->nr_cpus_allowed, weight);
- 	__mm_update_max_cids(mc);
--	if (!mc->percpu)
-+	if (!cid_on_cpu(mc->mode))
- 		return;
- 
- 	/* Adjust the threshold to the wider set */
-@@ -10474,6 +10489,16 @@ static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpu
- 	irq_work_queue(&mc->irq_work);
- }
- 
-+static inline void mm_cid_complete_transit(struct mm_struct *mm, unsigned int mode)
-+{
-+	/*
-+	 * Ensure that the store removing the TRANSIT bit cannot be
-+	 * reordered by the CPU before the fixups have been completed.
-+	 */
-+	smp_mb();
-+	WRITE_ONCE(mm->mm_cid.mode, mode);
-+}
-+
- static inline void mm_cid_transit_to_task(struct task_struct *t, struct mm_cid_pcpu *pcp)
- {
- 	if (cid_on_cpu(t->mm_cid.cid)) {
-@@ -10517,8 +10542,7 @@ static void mm_cid_fixup_cpus_to_tasks(struct mm_struct *mm)
- 			}
- 		}
- 	}
--	/* Clear the transition bit */
--	WRITE_ONCE(mm->mm_cid.transit, 0);
-+	mm_cid_complete_transit(mm, 0);
- }
- 
- static inline void mm_cid_transit_to_cpu(struct task_struct *t, struct mm_cid_pcpu *pcp)
-@@ -10590,8 +10614,7 @@ static void mm_cid_fixup_tasks_to_cpus(void)
- 	struct mm_struct *mm = current->mm;
- 
- 	mm_cid_do_fixup_tasks_to_cpus(mm);
--	/* Clear the transition bit */
--	WRITE_ONCE(mm->mm_cid.transit, 0);
-+	mm_cid_complete_transit(mm, MM_CID_ONCPU);
- }
- 
- static bool sched_mm_cid_add_user(struct task_struct *t, struct mm_struct *mm)
-@@ -10622,13 +10645,13 @@ void sched_mm_cid_fork(struct task_struct *t)
- 		}
- 
- 		if (!sched_mm_cid_add_user(t, mm)) {
--			if (!mm->mm_cid.percpu)
-+			if (!cid_on_cpu(mm->mm_cid.mode))
- 				t->mm_cid.cid = mm_get_cid(mm);
- 			return;
- 		}
- 
- 		/* Handle the mode change and transfer current's CID */
--		percpu = !!mm->mm_cid.percpu;
-+		percpu = cid_on_cpu(mm->mm_cid.mode);
- 		if (!percpu)
- 			mm_cid_transit_to_task(current, pcp);
- 		else
-@@ -10667,7 +10690,7 @@ static bool __sched_mm_cid_exit(struct task_struct *t)
- 	 * affinity change increased the number of allowed CPUs and the
- 	 * deferred fixup did not run yet.
- 	 */
--	if (WARN_ON_ONCE(mm->mm_cid.percpu))
-+	if (WARN_ON_ONCE(cid_on_cpu(mm->mm_cid.mode)))
- 		return false;
- 	/*
- 	 * A failed fork(2) cleanup never gets here, so @current must have
-@@ -10758,7 +10781,7 @@ static void mm_cid_work_fn(struct work_struct *work)
- 		if (!mm_update_max_cids(mm))
- 			return;
- 		/* Affinity changes can only switch back to task mode */
--		if (WARN_ON_ONCE(mm->mm_cid.percpu))
-+		if (WARN_ON_ONCE(cid_on_cpu(mm->mm_cid.mode)))
- 			return;
- 	}
- 	mm_cid_fixup_cpus_to_tasks(mm);
-@@ -10779,8 +10802,7 @@ static void mm_cid_irq_work(struct irq_work *work)
- void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
- {
- 	mm->mm_cid.max_cids = 0;
--	mm->mm_cid.percpu = 0;
--	mm->mm_cid.transit = 0;
-+	mm->mm_cid.mode = 0;
- 	mm->mm_cid.nr_cpus_allowed = p->nr_cpus_allowed;
- 	mm->mm_cid.users = 0;
- 	mm->mm_cid.pcpu_thrs = 0;
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 96f613d7d181..ecf2281e3545 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -3793,7 +3793,8 @@ static __always_inline void mm_cid_update_pcpu_cid(struct mm_struct *mm, unsigne
- 	__this_cpu_write(mm->mm_cid.pcpu->cid, cid);
- }
- 
--static __always_inline void mm_cid_from_cpu(struct task_struct *t, unsigned int cpu_cid)
-+static __always_inline void mm_cid_from_cpu(struct task_struct *t, unsigned int cpu_cid,
-+					    unsigned int mode)
- {
- 	unsigned int max_cids, tcid = t->mm_cid.cid;
- 	struct mm_struct *mm = t->mm;
-@@ -3819,15 +3820,16 @@ static __always_inline void mm_cid_from_cpu(struct task_struct *t, unsigned int
- 		if (!cid_on_cpu(cpu_cid))
- 			cpu_cid = cid_to_cpu_cid(mm_get_cid(mm));
- 
--		/* Set the transition mode flag if required */
--		if (READ_ONCE(mm->mm_cid.transit))
-+		/* Handle the transition mode flag if required */
-+		if (mode & MM_CID_TRANSIT)
- 			cpu_cid = cpu_cid_to_cid(cpu_cid) | MM_CID_TRANSIT;
- 	}
- 	mm_cid_update_pcpu_cid(mm, cpu_cid);
- 	mm_cid_update_task_cid(t, cpu_cid);
- }
- 
--static __always_inline void mm_cid_from_task(struct task_struct *t, unsigned int cpu_cid)
-+static __always_inline void mm_cid_from_task(struct task_struct *t, unsigned int cpu_cid,
-+					     unsigned int mode)
- {
- 	unsigned int max_cids, tcid = t->mm_cid.cid;
- 	struct mm_struct *mm = t->mm;
-@@ -3853,7 +3855,7 @@ static __always_inline void mm_cid_from_task(struct task_struct *t, unsigned int
- 		if (!cid_on_task(tcid))
- 			tcid = mm_get_cid(mm);
- 		/* Set the transition mode flag if required */
--		tcid |= READ_ONCE(mm->mm_cid.transit);
-+		tcid |= mode & MM_CID_TRANSIT;
- 	}
- 	mm_cid_update_pcpu_cid(mm, tcid);
- 	mm_cid_update_task_cid(t, tcid);
-@@ -3862,16 +3864,17 @@ static __always_inline void mm_cid_from_task(struct task_struct *t, unsigned int
- static __always_inline void mm_cid_schedin(struct task_struct *next)
- {
- 	struct mm_struct *mm = next->mm;
--	unsigned int cpu_cid;
-+	unsigned int cpu_cid, mode;
- 
- 	if (!next->mm_cid.active)
- 		return;
- 
- 	cpu_cid = __this_cpu_read(mm->mm_cid.pcpu->cid);
--	if (likely(!READ_ONCE(mm->mm_cid.percpu)))
--		mm_cid_from_task(next, cpu_cid);
-+	mode = READ_ONCE(mm->mm_cid.mode);
-+	if (likely(!cid_on_cpu(mode)))
-+		mm_cid_from_task(next, cpu_cid, mode);
- 	else
--		mm_cid_from_cpu(next, cpu_cid);
-+		mm_cid_from_cpu(next, cpu_cid, mode);
- }
- 
- static __always_inline void mm_cid_schedout(struct task_struct *prev)
--- 
-2.52.0
-
diff --git a/ci/diffs/202602023-sched-mmcid-Drop-per-CPU-CID-immediately-when-s.patch b/ci/diffs/202602023-sched-mmcid-Drop-per-CPU-CID-immediately-when-s.patch
deleted file mode 100644
index 4699a7c0..00000000
--- a/ci/diffs/202602023-sched-mmcid-Drop-per-CPU-CID-immediately-when-s.patch
+++ /dev/null
@@ -1,49 +0,0 @@
-From 3f2e95ba4a663b7d57b662d69a689b73be1db25d Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@kernel.org>
-Date: Mon, 2 Feb 2026 10:39:50 +0100
-Subject: [PATCH 202602023/202602024] sched/mmcid: Drop per CPU CID immediately
- when switching to per task mode
-
-When a exiting task initiates the switch from per CPU back to per task
-mode, it has already dropped its CID and marked itself inactive. But a
-leftover from an earlier iteration of the rework then reassigns the per
-CPU CID to the exiting task with the transition bit set.
-
-That's wrong as the task is already marked CID inactive, which means it is
-inconsistent state. It's harmless because the CID is marked in transit and
-therefore dropped back into the pool when the exiting task schedules out
-either through preemption or the final schedule().
-
-Simply drop the per CPU CID when the exiting task triggered the transition.
-
-Fixes: fbd0e71dc370 ("sched/mmcid: Provide CID ownership mode fixup functions")
-Signed-off-by: Thomas Gleixner <tglx@kernel.org>
-Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
----
- kernel/sched/core.c | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index f429ff4eb84c..93421141da5b 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -10723,8 +10723,14 @@ void sched_mm_cid_exit(struct task_struct *t)
- 			scoped_guard(raw_spinlock_irq, &mm->mm_cid.lock) {
- 				if (!__sched_mm_cid_exit(t))
- 					return;
--				/* Mode change required. Transfer currents CID */
--				mm_cid_transit_to_task(current, this_cpu_ptr(mm->mm_cid.pcpu));
-+				/*
-+				 * Mode change. The task has the CID unset
-+				 * already. The CPU CID is still valid and
-+				 * does not have MM_CID_TRANSIT set as the
-+				 * mode change has just taken effect under
-+				 * mm::mm_cid::lock. Drop it.
-+				 */
-+				mm_drop_cid_on_cpu(mm, this_cpu_ptr(mm->mm_cid.pcpu));
- 			}
- 			mm_cid_fixup_cpus_to_tasks(mm);
- 			return;
--- 
-2.52.0
-
diff --git a/ci/diffs/202602024-sched-mmcid-Optimize-transitional-CIDs-when-sch.patch b/ci/diffs/202602024-sched-mmcid-Optimize-transitional-CIDs-when-sch.patch
deleted file mode 100644
index f0f3ff2a..00000000
--- a/ci/diffs/202602024-sched-mmcid-Optimize-transitional-CIDs-when-sch.patch
+++ /dev/null
@@ -1,80 +0,0 @@
-From 0753080bd26e8209d870106ac3bd1d80454c1399 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@kernel.org>
-Date: Mon, 2 Feb 2026 10:39:55 +0100
-Subject: [PATCH 202602024/202602024] sched/mmcid: Optimize transitional CIDs
- when scheduling out
-
-During the investigation of the various transition mode issues
-instrumentation revealed that the amount of bitmap operations can be
-significantly reduced when a task with a transitional CID schedules out
-after the fixup function completed and disabled the transition mode.
-
-At that point the mode is stable and therefore it is not required to drop
-the transitional CID back into the pool. As the fixup is complete the
-potential exhaustion of the CID pool is not longer possible, so the CID can
-be transferred to the scheduling out task or to the CPU depending on the
-current ownership mode.
-
-The racy snapshot of mm_cid::mode which contains both the ownership state
-and the transition bit is valid because runqueue lock is held and the fixup
-function of a concurrent mode switch is serialized.
-
-Assigning the ownership right there not only spares the bitmap access for
-dropping the CID it also avoids it when the task is scheduled back in as it
-directly hits the fast path in both modes when the CID is within the
-optimal range. If it's outside the range the next schedule in will need to
-converge so dropping it right away is sensible. In the good case this also
-allows to go into the fast path on the next schedule in operation.
-
-With a thread pool benchmark which is configured to cross the mode switch
-boundaries frequently this reduces the number of bitmap operations by about
-30% and increases the fastpath utilization in the low single digit
-percentage range.
-
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
----
- kernel/sched/sched.h | 23 +++++++++++++++++++++--
- 1 file changed, 21 insertions(+), 2 deletions(-)
-
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index ecf2281e3545..70b595dbf227 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -3879,12 +3879,31 @@ static __always_inline void mm_cid_schedin(struct task_struct *next)
- 
- static __always_inline void mm_cid_schedout(struct task_struct *prev)
- {
-+	struct mm_struct *mm = prev->mm;
-+	unsigned int mode, cid;
-+
- 	/* During mode transitions CIDs are temporary and need to be dropped */
- 	if (likely(!cid_in_transit(prev->mm_cid.cid)))
- 		return;
- 
--	mm_drop_cid(prev->mm, cid_from_transit_cid(prev->mm_cid.cid));
--	prev->mm_cid.cid = MM_CID_UNSET;
-+	mode = READ_ONCE(mm->mm_cid.mode);
-+	cid = cid_from_transit_cid(prev->mm_cid.cid);
-+
-+	/*
-+	 * If transition mode is done, transfer ownership when the CID is
-+	 * within the convergence range to optimize the next schedule in.
-+	 */
-+	if (!cid_in_transit(mode) && cid < READ_ONCE(mm->mm_cid.max_cids)) {
-+		if (cid_on_cpu(mode))
-+			cid = cid_to_cpu_cid(cid);
-+
-+		/* Update both so that the next schedule in goes into the fast path */
-+		mm_cid_update_pcpu_cid(mm, cid);
-+		prev->mm_cid.cid = cid;
-+	} else {
-+		mm_drop_cid(mm, cid);
-+		prev->mm_cid.cid = MM_CID_UNSET;
-+	}
- }
- 
- static inline void mm_cid_switch_to(struct task_struct *prev, struct task_struct *next)
--- 
-2.52.0
-
diff --git a/ci/diffs/20260210-selftests-bpf-Rename-do_filp_open-to-do_file_open.patch b/ci/diffs/20260210-selftests-bpf-Rename-do_filp_open-to-do_file_open.patch
deleted file mode 100644
index 23e0c12d..00000000
--- a/ci/diffs/20260210-selftests-bpf-Rename-do_filp_open-to-do_file_open.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From 11192aeefa42b4788cf0eff65a2972b7a90a5b86 Mon Sep 17 00:00:00 2001
-From: Ihor Solodrai <ihor.solodrai@linux.dev>
-Date: Tue, 10 Feb 2026 14:53:15 -0800
-Subject: [PATCH bpf v1] selftests/bpf: Rename do_filp_open() to do_file_open()
-
-do_filp_open() was renamed (fixing typo) in commit
-541003b576c3 ("rename do_filp_open() to do_file_open()")
-
-This broke test_profiler, because it uses a kretprobe on that
-function. Fix it by renaming accordingly.
-
-Fixes: 541003b576c3 ("rename do_filp_open() to do_file_open()")
-Reported-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
-Closes: https://lore.kernel.org/bpf/djwjf2vfb7gro3rfag666bojod6ytcectahnb5z6hx2hawimtj@sx47ghzjg4lw/
-Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
----
- tools/testing/selftests/bpf/progs/profiler.h     | 2 +-
- tools/testing/selftests/bpf/progs/profiler.inc.h | 6 +++---
- 2 files changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h
-index 3bac4fdd4bdf..637fbf2c2652 100644
---- a/tools/testing/selftests/bpf/progs/profiler.h
-+++ b/tools/testing/selftests/bpf/progs/profiler.h
-@@ -169,7 +169,7 @@ enum bpf_function_id {
- 	profiler_bpf_sched_process_exec,
- 	profiler_bpf_sched_process_exit,
- 	profiler_bpf_sys_enter_kill,
--	profiler_bpf_do_filp_open_ret,
-+	profiler_bpf_do_file_open_ret,
- 	profiler_bpf_sched_process_fork,
- 	profiler_bpf_vfs_link,
- 	profiler_bpf_vfs_symlink,
-diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
-index 813143b4985d..9044dd8aff11 100644
---- a/tools/testing/selftests/bpf/progs/profiler.inc.h
-+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
-@@ -751,11 +751,11 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
- 	return 0;
- }
- 
--SEC("kretprobe/do_filp_open")
--int kprobe_ret__do_filp_open(struct pt_regs* ctx)
-+SEC("kretprobe/do_file_open")
-+int kprobe_ret__do_file_open(struct pt_regs *ctx)
- {
- 	struct bpf_func_stats_ctx stats_ctx;
--	bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
-+	bpf_stats_enter(&stats_ctx, profiler_bpf_do_file_open_ret);
- 
- 	struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
- 
--- 
-2.53.0
-
diff --git a/ci/diffs/20260212-sched-mmcid-Don-t-assume-CID-is-CPU-owned-on-mode-sw.patch b/ci/diffs/20260212-sched-mmcid-Don-t-assume-CID-is-CPU-owned-on-mode-sw.patch
deleted file mode 100644
index 87823ba0..00000000
--- a/ci/diffs/20260212-sched-mmcid-Don-t-assume-CID-is-CPU-owned-on-mode-sw.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From 1e83ccd5921a610ef409a7d4e56db27822b4ea39 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@kernel.org>
-Date: Tue, 10 Feb 2026 17:20:51 +0100
-Subject: [PATCH] sched/mmcid: Don't assume CID is CPU owned on mode switch
-
-Shinichiro reported a KASAN UAF, which is actually an out of bounds access
-in the MMCID management code.
-
-   CPU0						CPU1
-   						T1 runs in userspace
-   T0: fork(T4) -> Switch to per CPU CID mode
-         fixup() set MM_CID_TRANSIT on T1/CPU1
-   T4 exit()
-   T3 exit()
-   T2 exit()
-						T1 exit() switch to per task mode
-						 ---> Out of bounds access.
-
-As T1 has not scheduled after T0 set the TRANSIT bit, it exits with the
-TRANSIT bit set. sched_mm_cid_remove_user() clears the TRANSIT bit in
-the task and drops the CID, but it does not touch the per CPU storage.
-That's functionally correct because a CID is only owned by the CPU when
-the ONCPU bit is set, which is mutually exclusive with the TRANSIT flag.
-
-Now sched_mm_cid_exit() assumes that the CID is CPU owned because the
-prior mode was per CPU. It invokes mm_drop_cid_on_cpu() which clears the
-not set ONCPU bit and then invokes clear_bit() with an insanely large
-bit number because TRANSIT is set (bit 29).
-
-Prevent that by actually validating that the CID is CPU owned in
-mm_drop_cid_on_cpu().
-
-Fixes: 007d84287c74 ("sched/mmcid: Drop per CPU CID immediately when switching to per task mode")
-Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
-Signed-off-by: Thomas Gleixner <tglx@kernel.org>
-Tested-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
-Cc: stable@vger.kernel.org
-Closes: https://lore.kernel.org/aYsZrixn9b6s_2zL@shinmob
-Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
----
- kernel/sched/core.c  | 7 +++----
- kernel/sched/sched.h | 6 ++++--
- 2 files changed, 7 insertions(+), 6 deletions(-)
-
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 7c8b769c0d0d..759777694c78 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -10788,10 +10788,9 @@ void sched_mm_cid_exit(struct task_struct *t)
- 					return;
- 				/*
- 				 * Mode change. The task has the CID unset
--				 * already. The CPU CID is still valid and
--				 * does not have MM_CID_TRANSIT set as the
--				 * mode change has just taken effect under
--				 * mm::mm_cid::lock. Drop it.
-+				 * already and dealt with an eventually set
-+				 * TRANSIT bit. If the CID is owned by the CPU
-+				 * then drop it.
- 				 */
- 				mm_drop_cid_on_cpu(mm, this_cpu_ptr(mm->mm_cid.pcpu));
- 			}
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index e51bfa3586fa..b82fb70a9d54 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -3813,8 +3813,10 @@ static __always_inline void mm_unset_cid_on_task(struct task_struct *t)
- static __always_inline void mm_drop_cid_on_cpu(struct mm_struct *mm, struct mm_cid_pcpu *pcp)
- {
- 	/* Clear the ONCPU bit, but do not set UNSET in the per CPU storage */
--	pcp->cid = cpu_cid_to_cid(pcp->cid);
--	mm_drop_cid(mm, pcp->cid);
-+	if (cid_on_cpu(pcp->cid)) {
-+		pcp->cid = cpu_cid_to_cid(pcp->cid);
-+		mm_drop_cid(mm, pcp->cid);
-+	}
- }
- 
- static inline unsigned int __mm_get_cid(struct mm_struct *mm, unsigned int max_cids)
--- 
-2.53.0
-
diff --git a/ci/diffs/20260223-s390-bpf-Do-not-increment-tailcall-count-when-prog-i.patch b/ci/diffs/20260223-s390-bpf-Do-not-increment-tailcall-count-when-prog-i.patch
new file mode 100644
index 00000000..12f6e2d4
--- /dev/null
+++ b/ci/diffs/20260223-s390-bpf-Do-not-increment-tailcall-count-when-prog-i.patch
@@ -0,0 +1,66 @@
+From 2a1240d57fe7518f118d8ccb70c08908657bb8ae Mon Sep 17 00:00:00 2001
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Tue, 17 Feb 2026 17:10:06 +0100
+Subject: [PATCH] s390/bpf: Do not increment tailcall count when prog is NULL
+
+Currently tail calling a non-existent prog results in tailcall count
+increment. This is what the interpreter is doing, but this is clearly
+wrong, so replace load-and-increment and compare-and-jump with load
+and compare-and-jump, conditionally followed by increment and store.
+
+Reported-by: Hari Bathini <hbathini@linux.ibm.com>
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+---
+ arch/s390/net/bpf_jit_comp.c | 23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
+index bf92964246eb..211226748662 100644
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -1862,20 +1862,21 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
+ 				 jit->prg);
+ 
+ 		/*
+-		 * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
++		 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
+ 		 *         goto out;
++		 *
++		 * tail_call_cnt is read into %w0, which needs to be preserved
++		 * until it's incremented and flushed.
+ 		 */
+ 
+ 		off = jit->frame_off +
+ 		      offsetof(struct prog_frame, tail_call_cnt);
+-		/* lhi %w0,1 */
+-		EMIT4_IMM(0xa7080000, REG_W0, 1);
+-		/* laal %w1,%w0,off(%r15) */
+-		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
+-		/* clij %w1,MAX_TAIL_CALL_CNT-1,0x2,out */
++		/* ly %w0,off(%r15) */
++		EMIT6_DISP_LH(0xe3000000, 0x0058, REG_W0, REG_0, REG_15, off);
++		/* clij %w0,MAX_TAIL_CALL_CNT,0xa,out */
+ 		patch_2_clij = jit->prg;
+-		EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT - 1,
+-				 2, jit->prg);
++		EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W0, MAX_TAIL_CALL_CNT,
++				 0xa, jit->prg);
+ 
+ 		/*
+ 		 * prog = array->ptrs[index];
+@@ -1894,6 +1895,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
+ 		patch_3_brc = jit->prg;
+ 		EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
+ 
++		/* tail_call_cnt++; */
++		/* ahi %w0,1 */
++		EMIT4_IMM(0xa70a0000, REG_W0, 1);
++		/* sty %w0,off(%r15) */
++		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, REG_0, REG_15, off);
++
+ 		/*
+ 		 * Restore registers before calling function
+ 		 */
+-- 
+2.53.0
+