diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index df1887fc..b0ffef5a 100755
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: true
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, macos-latest]
         build_type: [Release, Debug]
         copiler_suite: [msvc, llvm, gnu]
         backend: [epoll, kqueue, iocp, iouring]
@@ -39,6 +39,13 @@ jobs:
           - os: ubuntu-latest
             backend: iocp
 
+          - os: ubuntu-24.04-arm
+            copiler_suite: msvc
+          - os: ubuntu-24.04-arm
+            backend: kqueue
+          - os: ubuntu-24.04-arm
+            backend: iocp
+
           - os: macos-latest
             copiler_suite: msvc
           - os: macos-latest
@@ -102,6 +109,8 @@ jobs:
         fi
         cmake -B "${{ steps.strings.outputs.build-output-dir }}" \
           -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+          -DMG_ENABLE_BENCH=0 \
+          -DMG_IS_CI=1 \
           ${{ steps.strings.outputs.backend-cmake }} \
           -S "${{ github.workspace }}"
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1de71971..7f48bce6 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,9 @@ cmake_minimum_required (VERSION 3.8)
 
 project("ServerBox")
 
+option(MG_ENABLE_TEST "Configure the tests" 1)
+option(MG_ENABLE_BENCH "Configure the benchmarks" 0)
+
 if (NOT DEFINED CMAKE_CXX_STANDARD)
 	message(STATUS "Using C++20 standard as default")
 	set(CMAKE_CXX_STANDARD 20)
@@ -41,9 +44,9 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
 endif ()
 
 add_subdirectory(src)
-if (NOT MG_SKIP_TEST)
+if (MG_ENABLE_TEST)
 	add_subdirectory(test)
 endif()
-if (NOT MG_SKIP_BENCHES)
+if (MG_ENABLE_BENCH)
 	add_subdirectory(bench)
 endif()
diff --git a/README.md b/README.md
index c8fe2c99..7664e968 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 **Serverbox** is a framework for networking in C++. The purpose is similar to `boost::asio`. The focus is on these key points:
 
-- **Simplicity** - the API is hard to misuse and is easy to understand.
+- **Simplicity** - the API and algorithm is hard to misuse and is easy to understand.
 - **Compactness** - the framework is small, both in code and binary size.
 - **Speed** - extreme optimizations and efficiency for run-time and even compile-time.
 - **Fairness** - huge accent on algorithms' fairness and even utilization of the CPU.
@@ -13,7 +13,7 @@ Being tens if not hundreds of times smaller than Boost, this small framework out
 
 The framework consists of several modules implementing things most needed on the backend: network IO, task scheduling, fast data structures and containers, and some smaller utilities.
 
-The core features in the framework are `IOCore`  - a networking layer to accept clients, to send and receive data, and `TaskScheduler` - request processing engine. More about them below.
+The core features in the framework are `IOCore`  - a networking layer to accept clients, to send and receive data, and `TaskScheduler` - task processing engine. More about them below.
 
 ## `TaskScheduler`
 
@@ -28,7 +28,7 @@ Task* t = new Task([](Task *self) {
 });
 sched.Post(t);
 ```
-The `Task` object is a context which can be just deleted right after single callback invocation, or can be attached to your own data and re-used across multiple steps of your pipeline, and can be used for deadlines, wakeups, signaling, etc.
+The `Task` object is a very light context (< 100 bytes) which can be just deleted right after single callback invocation, or can be attached to your own data and re-used across multiple steps of your pipeline, and can be used for deadlines, wakeups, signaling, etc.
 
 It can also be used with C++20 coroutines:
 ```C++
@@ -42,6 +42,7 @@ t->SetCallback([](Task *self) -> mg::box::Coro {
 	else
 		printf("No signal");
 	co_await self->AsyncExitDelete();
+	assert(!"unreachable");
 	co_return;
 }(t));
 sched.Post(t);
@@ -109,25 +110,26 @@ private:
 	const Host myHost;
 };
 ```
-`IOCore` core can be used as a task scheduler - `IOTask`s don't need to have sockets right from the start or at all. It can also be combined with `TaskScheduler` to execute your business logic in there, and do just IO in `IOCore`.
+`IOCore` core can be used as a task scheduler - `IOTask`s don't need to have sockets right from the start or at all. It can also be combined with `TaskScheduler` to execute your business logic in there, and do just IO in `IOCore`. This is actually the preferable usage.
 
 ## Getting Started
 
 ### Dependencies
 
-* At least C++11;
+* At least C++17;
 * A standard C++ library. On non-Windows also need `pthread` library.
 * Compiler support:
 	- Windows MSVC;
 	- Clang;
 	- GCC;
-* OS should be any version of Linux, Windows, WSL, Mac. The actual testing was done on:
-	- Windows 10;
-	- WSLv1;
-	- Debian 4.19;
-	- MacOS Catalina 11.7.10;
-	- Ubuntu 22.04.4 LTS;
-* Supports only architecture x86-64. On ARM it might work, but wasn't compiled nor tested (yet);
+* Kernel support:
+	- Windows *(10 and later guaranteed)*;
+	- WSLv1 *(v2 as well, since this is basically a VM)*;
+	- Linux *(any version)*;
+	- MacOS *(earliest tested Catalina 11.7.10)*;
+* Architecture support:
+	- x86
+	- ARM
 * CMake. Compatible with `cmake` CLI and with VisualStudio CMake.
 
 ### Build and test
@@ -136,12 +138,15 @@ private:
 
 It is possible to choose certain things at the CMake configuration stage. Each option can be given to CMake using `-D<name>=<value>` syntax. For example, `-DMG_AIO_USE_IOURING=1`.
 
-* `MG_AIO_USE_IOURING` - 1 = enable `io_uring` on Linux, 0 = use `epoll`. Default is 0.
-* `MG_BOOST_USE_IOURING` - 1 = enable `io_uring` on Linux for `boost` in the benchmarks, 0 = use `epoll`. Default is 0.
+* `MG_ENABLE_TEST` - 1/0 = enable or disable tests compilation. Handy, when building and installing it regularly and want to save time. **Default is 1**.
+* `MG_ENABLE_BENCH` - 1/0 = same as above for benchmarks. Disabling them also makes sense because they might be not compatible with certain `boost` versions. **Default is 0**.
+* `MG_AIO_USE_IOURING` - 1/0 = enable/disable `io_uring` on Linux, 0 = use `epoll`, 1 = use `io_uring`. **Default is 0**.
+* `MG_BOOST_USE_IOURING` - 1/0 = same for `boost::asio` used in the benchmarks. **Default is 0**.
+* `MG_IS_CI` - 1/0 = whether is running in CI. Is used to reduce duration of some tests which are more about perf than correctness. **Default is 0**.
 
 #### Visual Studio
 * Open VisualStudio;
-* Select "Open a local folder";
+* Select *"Open a local folder"*;
 * Select `serverbox/` folder where the main `CMakeLists.txt` is located;
 * Wait for CMake parsing and configuration to complete;
 * Build and run as a normal VS project.
@@ -170,7 +175,7 @@ Useful tips (for clean project, from the `build` folder created above):
 	- Debug, no optimization at all:<br/>
 	  `cmake -DCMAKE_BUILD_TYPE=Debug ../`;
 * Change C++ standard:
-	`cmake -DCMAKE_CXX_STANDARD=11/14/17/20/...`;
+	`cmake -DCMAKE_CXX_STANDARD=17/20/...`;
 
 ### Installation
 
@@ -181,7 +186,7 @@ cd build
 cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$(pwd)/installed ../
 make install
 ```
-This creates a folder `installed/` which contains the library binaries and headers. The libraries are self-sufficient and isolated. It means you can take just `TaskScheduler` headers and static library, or just `IOCore`'s, or just basic `libmgbox` and its headers. Or any combination of those.
+This creates a subfolder `installed/` in the current directory, which contains the library binaries and headers. The libraries are self-sufficient and isolated. It means you can take just `TaskScheduler` headers and static library, or just `IOCore`'s, or just basic `libmgbox` and its headers. Or any combination of those.
 
 #### Stubs
 
diff --git a/bench/README.md b/bench/README.md
index 4e79b06f..579c88c4 100644
--- a/bench/README.md
+++ b/bench/README.md
@@ -1,6 +1,6 @@
 # Benchmarks
 
-See `result_...` folders for reports.
+See `<component>/results/` folders for reports.
 
 ## Method
 
@@ -8,17 +8,17 @@ The idea is not just run some target features against artificial load. The point
 
 The benchmarks are implemented in multiple executables. Each exe uses one implementation of a certain feature.
 
-For example, take task scheduler feature. There are 2 exes:
+For example, take `TaskScheduler`. There are 2 exes:
 
 * `bench_taskscheduler`. It runs `TaskScheduler`, the real task scheduler with all its features.
 * `bench_taskscheduler_trivial`. It runs an alternative task scheduler implemented in a very trivial way, extremely simple. It lacks most features but is quite fast in some scenarios.
 
-Both exes run exactly the same bench scenarios, but using different scheduler implementations. The same works for other features.
+Both exes run exactly the same bench scenarios, but using different scheduler implementations. The same works for other features. For instance, `IOCore` is compared with `boost::asio`; the lock-free queues are compared with trivial mutex-locked queues.
 
 ## Running
 
 The executables can be run locally either directly or via a script.
 
-**Direct** run is just starting the exe, providing the parameters, observing the output. The parameters better see in the code. Can run individual tests, or can run one of them multiple times and get aggregated info printed. Like min/median/max values of a target metric.
+**Direct** run is just starting the exe, providing the parameters, observing the output. The parameters better see in the code or look which ones are passed by the `config.json`. Can run individual tests, or can run one of them multiple times and get aggregated info printed. Like min/median/max values of a target metric.
 
-**Script** is how to run extra many benchmarks and compare different implementations. The script `report.py` takes a JSON config which provides exes and scenarios to test; runs the exes on all scenarios; generates a markdown report. Like the ones stored in `result_...` folders. The easiest way to understand how to run it is to look at the example configs and at the source code.
+**Script** is how to run many benchmarks with multiple scenarios and compare different implementations. The script `report.py` takes a JSON config which provides exes and scenarios to test (see `report.py --help`); runs the exes on all scenarios; generates a markdown report. Like the ones stored in `results` folders. The easiest way to understand how to run it is to look at the example configs and at the source code.
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index b203f3f0..aafbb903 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -47,8 +47,8 @@ add_custom_target(install_serverbox
 		-S ${CMAKE_SOURCE_DIR}/..
 		-B ${MG_SERVERBOX_BUILD_DIR}
 		-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-		-DMG_SKIP_BENCHES=1
-		-DMG_SKIP_TEST=1
+		-DMG_ENABLE_TEST=0
+		-DMG_ENABLE_BENCH=0
 		-DCMAKE_INSTALL_PREFIX=${MG_SERVERBOX_DIR}
 
 	COMMAND ${CMAKE_COMMAND}
diff --git a/src/mg/sch/README.md b/src/mg/sch/README.md
index e69439c3..c7b3996c 100644
--- a/src/mg/sch/README.md
+++ b/src/mg/sch/README.md
@@ -24,6 +24,7 @@ t->SetCallback([](Task *self) -> mg::box::Coro {
 	else
 		printf("No signal");
 	co_await self->AsyncExitDelete();
+	assert(!"unreachable");
 	co_return;
 }(t));
 sched.Post(t);
@@ -53,7 +54,7 @@ The performance depends on thread count and task durations. An example of how it
 - 2 worker threads;
 - A task body takes up to tens of nanoseconds, quite short. Worst case scenario for contention.
 
-That gives **more than 5 millions of tasks per second**. That isn't the top perf, just a regular example. For more info see `bench` folder with detailed reports and if want to run them yourself.
+That gives **more than 5 millions of tasks per second** on quite a weak CPU. That isn't the top perf, just a regular example. For more info see `bench` folder with detailed reports and if want to run them yourself.
 
 #### Correctness
 The algorithms used in the scheduler are validated in TLA+ specifications to ensure absence of deadlocks, unintentional reordering, task loss, and other logical mistakes.
@@ -102,8 +103,8 @@ private:
 		myTask.SetCallback(this, &MyCoroutine::PrivStep2);
 
 		// Post self again when complete.
-		myClient.Get(firstUrl, [&sched, aTask]() {
-			sched.Post(aTask);
+		myClient.Get(firstUrl, [aTask]() {
+			TaskScheduler::This().Post(aTask);
 		});
 	}
 
@@ -116,8 +117,8 @@ private:
 		myTask.SetCallback(this, &MyCoroutine::PrivStep3);
 
 		// Post self again when complete.
-		myClient.Head(secondUrl, [&sched, aTask]() {
-			sched.Post(aTask);
+		myClient.Head(secondUrl, [aTask]() {
+			TaskScheduler::This().Post(aTask);
 		});
 	}
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 3754b641..57853010 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,5 +1,13 @@
 cmake_minimum_required (VERSION 3.8)
 
+option(MG_IS_CI "Whether is running in CI" 0)
+
+if (MG_IS_CI)
+	add_compile_definitions(MG_IS_CI=1)
+else()
+	add_compile_definitions(MG_IS_CI=0)
+endif()
+
 add_executable(test
 	main.cpp
 	UnitTest.cpp
diff --git a/test/box/UnitTestThreadLocalPool.cpp b/test/box/UnitTestThreadLocalPool.cpp
index cf68486c..e693c0c3 100644
--- a/test/box/UnitTestThreadLocalPool.cpp
+++ b/test/box/UnitTestThreadLocalPool.cpp
@@ -115,7 +115,13 @@ namespace threadlocalpool {
 
 		constexpr uint32_t threadCount = 5;
 		constexpr uint32_t valueCount = 100000;
+#if MG_IS_CI
+		// The default workload runs for 10 minutes in GitHub CI on Windows Debug. Twice
+		// longer than complete job of any other run config. Lets make it smaller.
+		constexpr uint32_t iterCount = 20;
+#else
 		constexpr uint32_t iterCount = 200;
+#endif
 		std::vector<mg::box::Thread*> threads;
 		threads.reserve(threadCount);
 		mg::box::ConditionVariable condVar;